Commit | Line | Data |
---|---|---|
77241056 | 1 | /* |
2e2ba09e | 2 | * Copyright(c) 2015 - 2018 Intel Corporation. |
77241056 MM |
3 | * |
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | |
5 | * redistributing this file, you may do so under either license. | |
6 | * | |
7 | * GPL LICENSE SUMMARY | |
8 | * | |
77241056 MM |
9 | * This program is free software; you can redistribute it and/or modify |
10 | * it under the terms of version 2 of the GNU General Public License as | |
11 | * published by the Free Software Foundation. | |
12 | * | |
13 | * This program is distributed in the hope that it will be useful, but | |
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | * General Public License for more details. | |
17 | * | |
18 | * BSD LICENSE | |
19 | * | |
77241056 MM |
20 | * Redistribution and use in source and binary forms, with or without |
21 | * modification, are permitted provided that the following conditions | |
22 | * are met: | |
23 | * | |
24 | * - Redistributions of source code must retain the above copyright | |
25 | * notice, this list of conditions and the following disclaimer. | |
26 | * - Redistributions in binary form must reproduce the above copyright | |
27 | * notice, this list of conditions and the following disclaimer in | |
28 | * the documentation and/or other materials provided with the | |
29 | * distribution. | |
30 | * - Neither the name of Intel Corporation nor the names of its | |
31 | * contributors may be used to endorse or promote products derived | |
32 | * from this software without specific prior written permission. | |
33 | * | |
34 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
35 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
36 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
37 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
38 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
39 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
40 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
41 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
42 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
43 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
44 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
45 | * | |
46 | */ | |
47 | ||
48 | #include <linux/io.h> | |
ec4274f1 DD |
49 | #include <rdma/rdma_vt.h> |
50 | #include <rdma/rdmavt_qp.h> | |
77241056 MM |
51 | |
52 | #include "hfi.h" | |
53 | #include "qp.h" | |
bb5df5f9 | 54 | #include "verbs_txreq.h" |
77241056 MM |
55 | #include "trace.h" |
56 | ||
57 | /* cut down ridiculously long IB macro names */ | |
b374e060 | 58 | #define OP(x) RC_OP(x) |
77241056 | 59 | |
48a615dc KW |
60 | static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, |
61 | struct rvt_swqe *wqe, | |
62 | struct hfi1_ibport *ibp); | |
63 | ||
895420dd | 64 | static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, |
77241056 MM |
65 | u32 psn, u32 pmtu) |
66 | { | |
67 | u32 len; | |
68 | ||
69 | len = delta_psn(psn, wqe->psn) * pmtu; | |
70 | ss->sge = wqe->sg_list[0]; | |
71 | ss->sg_list = wqe->sg_list + 1; | |
72 | ss->num_sge = wqe->wr.num_sge; | |
73 | ss->total_len = wqe->length; | |
1198fcea | 74 | rvt_skip_sge(ss, len, false); |
77241056 MM |
75 | return wqe->length - len; |
76 | } | |
77 | ||
77241056 MM |
78 | /** |
79 | * make_rc_ack - construct a response packet (ACK, NAK, or RDMA read) | |
80 | * @dev: the device for this QP | |
81 | * @qp: a pointer to the QP | |
82 | * @ohdr: a pointer to the IB header being constructed | |
bb5df5f9 | 83 | * @ps: the xmit packet state |
77241056 MM |
84 | * |
85 | * Return 1 if constructed; otherwise, return 0. | |
86 | * Note that we are in the responder's side of the QP context. | |
87 | * Note the QP s_lock must be held. | |
88 | */ | |
895420dd | 89 | static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, |
261a4351 | 90 | struct ib_other_headers *ohdr, |
bb5df5f9 | 91 | struct hfi1_pkt_state *ps) |
77241056 | 92 | { |
895420dd | 93 | struct rvt_ack_entry *e; |
77241056 MM |
94 | u32 hwords; |
95 | u32 len; | |
44e43d91 MH |
96 | u32 bth0, bth2; |
97 | u32 bth1 = qp->remote_qpn | (HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT); | |
77241056 | 98 | int middle = 0; |
1235bef8 | 99 | u32 pmtu = qp->pmtu; |
14553ca1 | 100 | struct hfi1_qp_priv *priv = qp->priv; |
77241056 | 101 | |
68e78b3d | 102 | lockdep_assert_held(&qp->s_lock); |
77241056 | 103 | /* Don't send an ACK if we aren't supposed to. */ |
83693bd1 | 104 | if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) |
77241056 MM |
105 | goto bail; |
106 | ||
5b6cabb0 DH |
107 | if (priv->hdr_type == HFI1_PKT_TYPE_9B) |
108 | /* header size in 32-bit words LRH+BTH = (8+12)/4. */ | |
109 | hwords = 5; | |
110 | else | |
111 | /* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */ | |
112 | hwords = 7; | |
77241056 MM |
113 | |
114 | switch (qp->s_ack_state) { | |
115 | case OP(RDMA_READ_RESPONSE_LAST): | |
116 | case OP(RDMA_READ_RESPONSE_ONLY): | |
117 | e = &qp->s_ack_queue[qp->s_tail_ack_queue]; | |
118 | if (e->rdma_sge.mr) { | |
895420dd | 119 | rvt_put_mr(e->rdma_sge.mr); |
77241056 MM |
120 | e->rdma_sge.mr = NULL; |
121 | } | |
122 | /* FALLTHROUGH */ | |
123 | case OP(ATOMIC_ACKNOWLEDGE): | |
124 | /* | |
125 | * We can increment the tail pointer now that the last | |
126 | * response has been sent instead of only being | |
127 | * constructed. | |
128 | */ | |
ddf922c3 KW |
129 | if (++qp->s_tail_ack_queue > |
130 | rvt_size_atomic(ib_to_rvt(qp->ibqp.device))) | |
77241056 MM |
131 | qp->s_tail_ack_queue = 0; |
132 | /* FALLTHROUGH */ | |
133 | case OP(SEND_ONLY): | |
134 | case OP(ACKNOWLEDGE): | |
135 | /* Check for no next entry in the queue. */ | |
136 | if (qp->r_head_ack_queue == qp->s_tail_ack_queue) { | |
54d10c1e | 137 | if (qp->s_flags & RVT_S_ACK_PENDING) |
77241056 MM |
138 | goto normal; |
139 | goto bail; | |
140 | } | |
141 | ||
142 | e = &qp->s_ack_queue[qp->s_tail_ack_queue]; | |
143 | if (e->opcode == OP(RDMA_READ_REQUEST)) { | |
144 | /* | |
145 | * If a RDMA read response is being resent and | |
146 | * we haven't seen the duplicate request yet, | |
147 | * then stop sending the remaining responses the | |
148 | * responder has seen until the requester re-sends it. | |
149 | */ | |
150 | len = e->rdma_sge.sge_length; | |
151 | if (len && !e->rdma_sge.mr) { | |
152 | qp->s_tail_ack_queue = qp->r_head_ack_queue; | |
153 | goto bail; | |
154 | } | |
155 | /* Copy SGE state in case we need to resend */ | |
c239a5b5 MM |
156 | ps->s_txreq->mr = e->rdma_sge.mr; |
157 | if (ps->s_txreq->mr) | |
158 | rvt_get_mr(ps->s_txreq->mr); | |
77241056 MM |
159 | qp->s_ack_rdma_sge.sge = e->rdma_sge; |
160 | qp->s_ack_rdma_sge.num_sge = 1; | |
b777f154 | 161 | ps->s_txreq->ss = &qp->s_ack_rdma_sge; |
77241056 MM |
162 | if (len > pmtu) { |
163 | len = pmtu; | |
164 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); | |
165 | } else { | |
166 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); | |
167 | e->sent = 1; | |
168 | } | |
696513e8 | 169 | ohdr->u.aeth = rvt_compute_aeth(qp); |
77241056 MM |
170 | hwords++; |
171 | qp->s_ack_rdma_psn = e->psn; | |
172 | bth2 = mask_psn(qp->s_ack_rdma_psn++); | |
173 | } else { | |
174 | /* COMPARE_SWAP or FETCH_ADD */ | |
b777f154 | 175 | ps->s_txreq->ss = NULL; |
77241056 MM |
176 | len = 0; |
177 | qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); | |
696513e8 | 178 | ohdr->u.at.aeth = rvt_compute_aeth(qp); |
261a4351 | 179 | ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth); |
77241056 MM |
180 | hwords += sizeof(ohdr->u.at) / sizeof(u32); |
181 | bth2 = mask_psn(e->psn); | |
182 | e->sent = 1; | |
183 | } | |
184 | bth0 = qp->s_ack_state << 24; | |
185 | break; | |
186 | ||
187 | case OP(RDMA_READ_RESPONSE_FIRST): | |
188 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); | |
189 | /* FALLTHROUGH */ | |
190 | case OP(RDMA_READ_RESPONSE_MIDDLE): | |
b777f154 | 191 | ps->s_txreq->ss = &qp->s_ack_rdma_sge; |
c239a5b5 MM |
192 | ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr; |
193 | if (ps->s_txreq->mr) | |
194 | rvt_get_mr(ps->s_txreq->mr); | |
77241056 MM |
195 | len = qp->s_ack_rdma_sge.sge.sge_length; |
196 | if (len > pmtu) { | |
197 | len = pmtu; | |
198 | middle = HFI1_CAP_IS_KSET(SDMA_AHG); | |
199 | } else { | |
696513e8 | 200 | ohdr->u.aeth = rvt_compute_aeth(qp); |
77241056 MM |
201 | hwords++; |
202 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); | |
203 | e = &qp->s_ack_queue[qp->s_tail_ack_queue]; | |
204 | e->sent = 1; | |
205 | } | |
206 | bth0 = qp->s_ack_state << 24; | |
207 | bth2 = mask_psn(qp->s_ack_rdma_psn++); | |
208 | break; | |
209 | ||
210 | default: | |
211 | normal: | |
212 | /* | |
213 | * Send a regular ACK. | |
214 | * Set the s_ack_state so we wait until after sending | |
215 | * the ACK before setting s_ack_state to ACKNOWLEDGE | |
216 | * (see above). | |
217 | */ | |
218 | qp->s_ack_state = OP(SEND_ONLY); | |
54d10c1e | 219 | qp->s_flags &= ~RVT_S_ACK_PENDING; |
b777f154 | 220 | ps->s_txreq->ss = NULL; |
77241056 MM |
221 | if (qp->s_nak_state) |
222 | ohdr->u.aeth = | |
832666c1 | 223 | cpu_to_be32((qp->r_msn & IB_MSN_MASK) | |
77241056 | 224 | (qp->s_nak_state << |
832666c1 | 225 | IB_AETH_CREDIT_SHIFT)); |
77241056 | 226 | else |
696513e8 | 227 | ohdr->u.aeth = rvt_compute_aeth(qp); |
77241056 MM |
228 | hwords++; |
229 | len = 0; | |
230 | bth0 = OP(ACKNOWLEDGE) << 24; | |
231 | bth2 = mask_psn(qp->s_ack_psn); | |
232 | } | |
233 | qp->s_rdma_ack_cnt++; | |
14553ca1 | 234 | ps->s_txreq->sde = priv->s_sde; |
e922ae06 | 235 | ps->s_txreq->s_cur_size = len; |
9636258f | 236 | ps->s_txreq->hdr_dwords = hwords; |
44e43d91 | 237 | hfi1_make_ruc_header(qp, ohdr, bth0, bth1, bth2, middle, ps); |
77241056 MM |
238 | return 1; |
239 | ||
240 | bail: | |
241 | qp->s_ack_state = OP(ACKNOWLEDGE); | |
242 | /* | |
243 | * Ensure s_rdma_ack_cnt changes are committed prior to resetting | |
54d10c1e | 244 | * RVT_S_RESP_PENDING |
77241056 MM |
245 | */ |
246 | smp_wmb(); | |
54d10c1e DD |
247 | qp->s_flags &= ~(RVT_S_RESP_PENDING |
248 | | RVT_S_ACK_PENDING | |
2e2ba09e | 249 | | HFI1_S_AHG_VALID); |
77241056 MM |
250 | return 0; |
251 | } | |
252 | ||
253 | /** | |
254 | * hfi1_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) | |
255 | * @qp: a pointer to the QP | |
256 | * | |
46a80d62 MM |
257 | * Assumes s_lock is held. |
258 | * | |
77241056 MM |
259 | * Return 1 if constructed; otherwise, return 0. |
260 | */ | |
bb5df5f9 | 261 | int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) |
77241056 | 262 | { |
4c6829c5 | 263 | struct hfi1_qp_priv *priv = qp->priv; |
77241056 | 264 | struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); |
261a4351 | 265 | struct ib_other_headers *ohdr; |
895420dd DD |
266 | struct rvt_sge_state *ss; |
267 | struct rvt_swqe *wqe; | |
5b6cabb0 | 268 | u32 hwords; |
77241056 | 269 | u32 len; |
44e43d91 MH |
270 | u32 bth0 = 0, bth2; |
271 | u32 bth1 = qp->remote_qpn | (HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT); | |
77241056 MM |
272 | u32 pmtu = qp->pmtu; |
273 | char newreq; | |
77241056 MM |
274 | int middle = 0; |
275 | int delta; | |
276 | ||
68e78b3d | 277 | lockdep_assert_held(&qp->s_lock); |
bb5df5f9 | 278 | ps->s_txreq = get_txreq(ps->dev, qp); |
b697d7d8 | 279 | if (!ps->s_txreq) |
bb5df5f9 DD |
280 | goto bail_no_tx; |
281 | ||
5b6cabb0 DH |
282 | if (priv->hdr_type == HFI1_PKT_TYPE_9B) { |
283 | /* header size in 32-bit words LRH+BTH = (8+12)/4. */ | |
284 | hwords = 5; | |
285 | if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) | |
286 | ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth; | |
287 | else | |
288 | ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth; | |
289 | } else { | |
290 | /* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */ | |
291 | hwords = 7; | |
292 | if ((rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) && | |
293 | (hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr)))) | |
294 | ohdr = &ps->s_txreq->phdr.hdr.opah.u.l.oth; | |
295 | else | |
296 | ohdr = &ps->s_txreq->phdr.hdr.opah.u.oth; | |
297 | } | |
77241056 | 298 | |
77241056 | 299 | /* Sending responses has higher priority over sending requests. */ |
54d10c1e | 300 | if ((qp->s_flags & RVT_S_RESP_PENDING) && |
1235bef8 | 301 | make_rc_ack(dev, qp, ohdr, ps)) |
bb5df5f9 | 302 | return 1; |
77241056 | 303 | |
83693bd1 DD |
304 | if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) { |
305 | if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) | |
77241056 MM |
306 | goto bail; |
307 | /* We are in the error state, flush the work request. */ | |
eb04ff09 | 308 | if (qp->s_last == READ_ONCE(qp->s_head)) |
77241056 MM |
309 | goto bail; |
310 | /* If DMAs are in progress, we can't flush immediately. */ | |
14553ca1 | 311 | if (iowait_sdma_pending(&priv->s_iowait)) { |
54d10c1e | 312 | qp->s_flags |= RVT_S_WAIT_DMA; |
77241056 MM |
313 | goto bail; |
314 | } | |
315 | clear_ahg(qp); | |
83693bd1 | 316 | wqe = rvt_get_swqe_ptr(qp, qp->s_last); |
116aa033 | 317 | rvt_send_complete(qp, wqe, qp->s_last != qp->s_acked ? |
77241056 MM |
318 | IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR); |
319 | /* will get called again */ | |
bb5df5f9 | 320 | goto done_free_tx; |
77241056 MM |
321 | } |
322 | ||
54d10c1e | 323 | if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK)) |
77241056 MM |
324 | goto bail; |
325 | ||
326 | if (cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) { | |
327 | if (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) { | |
54d10c1e | 328 | qp->s_flags |= RVT_S_WAIT_PSN; |
77241056 MM |
329 | goto bail; |
330 | } | |
331 | qp->s_sending_psn = qp->s_psn; | |
332 | qp->s_sending_hpsn = qp->s_psn - 1; | |
333 | } | |
334 | ||
335 | /* Send a request. */ | |
83693bd1 | 336 | wqe = rvt_get_swqe_ptr(qp, qp->s_cur); |
77241056 MM |
337 | switch (qp->s_state) { |
338 | default: | |
83693bd1 | 339 | if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) |
77241056 MM |
340 | goto bail; |
341 | /* | |
342 | * Resend an old request or start a new one. | |
343 | * | |
344 | * We keep track of the current SWQE so that | |
345 | * we don't reset the "furthest progress" state | |
346 | * if we need to back up. | |
347 | */ | |
348 | newreq = 0; | |
349 | if (qp->s_cur == qp->s_tail) { | |
350 | /* Check if send work queue is empty. */ | |
eb04ff09 | 351 | if (qp->s_tail == READ_ONCE(qp->s_head)) { |
77241056 MM |
352 | clear_ahg(qp); |
353 | goto bail; | |
354 | } | |
355 | /* | |
356 | * If a fence is requested, wait for previous | |
357 | * RDMA read and atomic operations to finish. | |
358 | */ | |
359 | if ((wqe->wr.send_flags & IB_SEND_FENCE) && | |
360 | qp->s_num_rd_atomic) { | |
54d10c1e | 361 | qp->s_flags |= RVT_S_WAIT_FENCE; |
77241056 MM |
362 | goto bail; |
363 | } | |
0db3dfa0 JX |
364 | /* |
365 | * Local operations are processed immediately | |
366 | * after all prior requests have completed | |
367 | */ | |
368 | if (wqe->wr.opcode == IB_WR_REG_MR || | |
369 | wqe->wr.opcode == IB_WR_LOCAL_INV) { | |
d9b13c20 JX |
370 | int local_ops = 0; |
371 | int err = 0; | |
372 | ||
0db3dfa0 JX |
373 | if (qp->s_last != qp->s_cur) |
374 | goto bail; | |
375 | if (++qp->s_cur == qp->s_size) | |
376 | qp->s_cur = 0; | |
377 | if (++qp->s_tail == qp->s_size) | |
378 | qp->s_tail = 0; | |
d9b13c20 JX |
379 | if (!(wqe->wr.send_flags & |
380 | RVT_SEND_COMPLETION_ONLY)) { | |
0db3dfa0 JX |
381 | err = rvt_invalidate_rkey( |
382 | qp, | |
383 | wqe->wr.ex.invalidate_rkey); | |
d9b13c20 JX |
384 | local_ops = 1; |
385 | } | |
116aa033 VSD |
386 | rvt_send_complete(qp, wqe, |
387 | err ? IB_WC_LOC_PROT_ERR | |
388 | : IB_WC_SUCCESS); | |
d9b13c20 JX |
389 | if (local_ops) |
390 | atomic_dec(&qp->local_ops_pending); | |
0db3dfa0 JX |
391 | goto done_free_tx; |
392 | } | |
393 | ||
77241056 | 394 | newreq = 1; |
46a80d62 | 395 | qp->s_psn = wqe->psn; |
77241056 MM |
396 | } |
397 | /* | |
398 | * Note that we have to be careful not to modify the | |
399 | * original work request since we may need to resend | |
400 | * it. | |
401 | */ | |
402 | len = wqe->length; | |
403 | ss = &qp->s_sge; | |
404 | bth2 = mask_psn(qp->s_psn); | |
405 | switch (wqe->wr.opcode) { | |
406 | case IB_WR_SEND: | |
407 | case IB_WR_SEND_WITH_IMM: | |
0db3dfa0 | 408 | case IB_WR_SEND_WITH_INV: |
77241056 | 409 | /* If no credit, return. */ |
54d10c1e | 410 | if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && |
696513e8 | 411 | rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) { |
54d10c1e | 412 | qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; |
77241056 MM |
413 | goto bail; |
414 | } | |
77241056 | 415 | if (len > pmtu) { |
77241056 MM |
416 | qp->s_state = OP(SEND_FIRST); |
417 | len = pmtu; | |
418 | break; | |
419 | } | |
e490974e | 420 | if (wqe->wr.opcode == IB_WR_SEND) { |
77241056 | 421 | qp->s_state = OP(SEND_ONLY); |
0db3dfa0 | 422 | } else if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { |
77241056 MM |
423 | qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE); |
424 | /* Immediate data comes after the BTH */ | |
425 | ohdr->u.imm_data = wqe->wr.ex.imm_data; | |
426 | hwords += 1; | |
0db3dfa0 JX |
427 | } else { |
428 | qp->s_state = OP(SEND_ONLY_WITH_INVALIDATE); | |
429 | /* Invalidate rkey comes after the BTH */ | |
430 | ohdr->u.ieth = cpu_to_be32( | |
431 | wqe->wr.ex.invalidate_rkey); | |
432 | hwords += 1; | |
77241056 MM |
433 | } |
434 | if (wqe->wr.send_flags & IB_SEND_SOLICITED) | |
435 | bth0 |= IB_BTH_SOLICITED; | |
436 | bth2 |= IB_BTH_REQ_ACK; | |
437 | if (++qp->s_cur == qp->s_size) | |
438 | qp->s_cur = 0; | |
439 | break; | |
440 | ||
441 | case IB_WR_RDMA_WRITE: | |
54d10c1e | 442 | if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) |
77241056 | 443 | qp->s_lsn++; |
5b0ef650 | 444 | goto no_flow_control; |
77241056 MM |
445 | case IB_WR_RDMA_WRITE_WITH_IMM: |
446 | /* If no credit, return. */ | |
54d10c1e | 447 | if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && |
696513e8 | 448 | rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) { |
54d10c1e | 449 | qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; |
77241056 MM |
450 | goto bail; |
451 | } | |
5b0ef650 | 452 | no_flow_control: |
261a4351 MM |
453 | put_ib_reth_vaddr( |
454 | wqe->rdma_wr.remote_addr, | |
455 | &ohdr->u.rc.reth); | |
77241056 | 456 | ohdr->u.rc.reth.rkey = |
e622f2f4 | 457 | cpu_to_be32(wqe->rdma_wr.rkey); |
77241056 MM |
458 | ohdr->u.rc.reth.length = cpu_to_be32(len); |
459 | hwords += sizeof(struct ib_reth) / sizeof(u32); | |
77241056 | 460 | if (len > pmtu) { |
77241056 MM |
461 | qp->s_state = OP(RDMA_WRITE_FIRST); |
462 | len = pmtu; | |
463 | break; | |
464 | } | |
e490974e | 465 | if (wqe->wr.opcode == IB_WR_RDMA_WRITE) { |
77241056 | 466 | qp->s_state = OP(RDMA_WRITE_ONLY); |
e490974e | 467 | } else { |
77241056 MM |
468 | qp->s_state = |
469 | OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); | |
470 | /* Immediate data comes after RETH */ | |
471 | ohdr->u.rc.imm_data = wqe->wr.ex.imm_data; | |
472 | hwords += 1; | |
473 | if (wqe->wr.send_flags & IB_SEND_SOLICITED) | |
474 | bth0 |= IB_BTH_SOLICITED; | |
475 | } | |
476 | bth2 |= IB_BTH_REQ_ACK; | |
477 | if (++qp->s_cur == qp->s_size) | |
478 | qp->s_cur = 0; | |
479 | break; | |
480 | ||
481 | case IB_WR_RDMA_READ: | |
482 | /* | |
483 | * Don't allow more operations to be started | |
484 | * than the QP limits allow. | |
485 | */ | |
486 | if (newreq) { | |
487 | if (qp->s_num_rd_atomic >= | |
488 | qp->s_max_rd_atomic) { | |
54d10c1e | 489 | qp->s_flags |= RVT_S_WAIT_RDMAR; |
77241056 MM |
490 | goto bail; |
491 | } | |
492 | qp->s_num_rd_atomic++; | |
54d10c1e | 493 | if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) |
77241056 | 494 | qp->s_lsn++; |
77241056 | 495 | } |
261a4351 MM |
496 | put_ib_reth_vaddr( |
497 | wqe->rdma_wr.remote_addr, | |
498 | &ohdr->u.rc.reth); | |
77241056 | 499 | ohdr->u.rc.reth.rkey = |
e622f2f4 | 500 | cpu_to_be32(wqe->rdma_wr.rkey); |
77241056 MM |
501 | ohdr->u.rc.reth.length = cpu_to_be32(len); |
502 | qp->s_state = OP(RDMA_READ_REQUEST); | |
503 | hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); | |
504 | ss = NULL; | |
505 | len = 0; | |
506 | bth2 |= IB_BTH_REQ_ACK; | |
507 | if (++qp->s_cur == qp->s_size) | |
508 | qp->s_cur = 0; | |
509 | break; | |
510 | ||
511 | case IB_WR_ATOMIC_CMP_AND_SWP: | |
512 | case IB_WR_ATOMIC_FETCH_AND_ADD: | |
513 | /* | |
514 | * Don't allow more operations to be started | |
515 | * than the QP limits allow. | |
516 | */ | |
517 | if (newreq) { | |
518 | if (qp->s_num_rd_atomic >= | |
519 | qp->s_max_rd_atomic) { | |
54d10c1e | 520 | qp->s_flags |= RVT_S_WAIT_RDMAR; |
77241056 MM |
521 | goto bail; |
522 | } | |
523 | qp->s_num_rd_atomic++; | |
77241056 | 524 | } |
48a615dc KW |
525 | |
526 | /* FALLTHROUGH */ | |
527 | case IB_WR_OPFN: | |
528 | if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) | |
529 | qp->s_lsn++; | |
530 | if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || | |
531 | wqe->wr.opcode == IB_WR_OPFN) { | |
77241056 | 532 | qp->s_state = OP(COMPARE_SWAP); |
261a4351 MM |
533 | put_ib_ateth_swap(wqe->atomic_wr.swap, |
534 | &ohdr->u.atomic_eth); | |
535 | put_ib_ateth_compare(wqe->atomic_wr.compare_add, | |
536 | &ohdr->u.atomic_eth); | |
77241056 MM |
537 | } else { |
538 | qp->s_state = OP(FETCH_ADD); | |
261a4351 MM |
539 | put_ib_ateth_swap(wqe->atomic_wr.compare_add, |
540 | &ohdr->u.atomic_eth); | |
541 | put_ib_ateth_compare(0, &ohdr->u.atomic_eth); | |
77241056 | 542 | } |
261a4351 MM |
543 | put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr, |
544 | &ohdr->u.atomic_eth); | |
77241056 | 545 | ohdr->u.atomic_eth.rkey = cpu_to_be32( |
e622f2f4 | 546 | wqe->atomic_wr.rkey); |
77241056 MM |
547 | hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); |
548 | ss = NULL; | |
549 | len = 0; | |
550 | bth2 |= IB_BTH_REQ_ACK; | |
551 | if (++qp->s_cur == qp->s_size) | |
552 | qp->s_cur = 0; | |
553 | break; | |
554 | ||
555 | default: | |
556 | goto bail; | |
557 | } | |
558 | qp->s_sge.sge = wqe->sg_list[0]; | |
559 | qp->s_sge.sg_list = wqe->sg_list + 1; | |
560 | qp->s_sge.num_sge = wqe->wr.num_sge; | |
561 | qp->s_sge.total_len = wqe->length; | |
562 | qp->s_len = wqe->length; | |
563 | if (newreq) { | |
564 | qp->s_tail++; | |
565 | if (qp->s_tail >= qp->s_size) | |
566 | qp->s_tail = 0; | |
567 | } | |
568 | if (wqe->wr.opcode == IB_WR_RDMA_READ) | |
569 | qp->s_psn = wqe->lpsn + 1; | |
46a80d62 | 570 | else |
77241056 | 571 | qp->s_psn++; |
77241056 MM |
572 | break; |
573 | ||
574 | case OP(RDMA_READ_RESPONSE_FIRST): | |
575 | /* | |
576 | * qp->s_state is normally set to the opcode of the | |
577 | * last packet constructed for new requests and therefore | |
578 | * is never set to RDMA read response. | |
579 | * RDMA_READ_RESPONSE_FIRST is used by the ACK processing | |
580 | * thread to indicate a SEND needs to be restarted from an | |
581 | * earlier PSN without interfering with the sending thread. | |
582 | * See restart_rc(). | |
583 | */ | |
584 | qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu); | |
585 | /* FALLTHROUGH */ | |
586 | case OP(SEND_FIRST): | |
587 | qp->s_state = OP(SEND_MIDDLE); | |
588 | /* FALLTHROUGH */ | |
589 | case OP(SEND_MIDDLE): | |
590 | bth2 = mask_psn(qp->s_psn++); | |
77241056 MM |
591 | ss = &qp->s_sge; |
592 | len = qp->s_len; | |
593 | if (len > pmtu) { | |
594 | len = pmtu; | |
595 | middle = HFI1_CAP_IS_KSET(SDMA_AHG); | |
596 | break; | |
597 | } | |
e490974e | 598 | if (wqe->wr.opcode == IB_WR_SEND) { |
77241056 | 599 | qp->s_state = OP(SEND_LAST); |
0db3dfa0 | 600 | } else if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { |
77241056 MM |
601 | qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); |
602 | /* Immediate data comes after the BTH */ | |
603 | ohdr->u.imm_data = wqe->wr.ex.imm_data; | |
604 | hwords += 1; | |
0db3dfa0 JX |
605 | } else { |
606 | qp->s_state = OP(SEND_LAST_WITH_INVALIDATE); | |
607 | /* invalidate data comes after the BTH */ | |
608 | ohdr->u.ieth = cpu_to_be32(wqe->wr.ex.invalidate_rkey); | |
609 | hwords += 1; | |
77241056 MM |
610 | } |
611 | if (wqe->wr.send_flags & IB_SEND_SOLICITED) | |
612 | bth0 |= IB_BTH_SOLICITED; | |
613 | bth2 |= IB_BTH_REQ_ACK; | |
614 | qp->s_cur++; | |
615 | if (qp->s_cur >= qp->s_size) | |
616 | qp->s_cur = 0; | |
617 | break; | |
618 | ||
619 | case OP(RDMA_READ_RESPONSE_LAST): | |
620 | /* | |
621 | * qp->s_state is normally set to the opcode of the | |
622 | * last packet constructed for new requests and therefore | |
623 | * is never set to RDMA read response. | |
624 | * RDMA_READ_RESPONSE_LAST is used by the ACK processing | |
625 | * thread to indicate a RDMA write needs to be restarted from | |
626 | * an earlier PSN without interfering with the sending thread. | |
627 | * See restart_rc(). | |
628 | */ | |
629 | qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu); | |
630 | /* FALLTHROUGH */ | |
631 | case OP(RDMA_WRITE_FIRST): | |
632 | qp->s_state = OP(RDMA_WRITE_MIDDLE); | |
633 | /* FALLTHROUGH */ | |
634 | case OP(RDMA_WRITE_MIDDLE): | |
635 | bth2 = mask_psn(qp->s_psn++); | |
77241056 MM |
636 | ss = &qp->s_sge; |
637 | len = qp->s_len; | |
638 | if (len > pmtu) { | |
639 | len = pmtu; | |
640 | middle = HFI1_CAP_IS_KSET(SDMA_AHG); | |
641 | break; | |
642 | } | |
e490974e | 643 | if (wqe->wr.opcode == IB_WR_RDMA_WRITE) { |
77241056 | 644 | qp->s_state = OP(RDMA_WRITE_LAST); |
e490974e | 645 | } else { |
77241056 MM |
646 | qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE); |
647 | /* Immediate data comes after the BTH */ | |
648 | ohdr->u.imm_data = wqe->wr.ex.imm_data; | |
649 | hwords += 1; | |
650 | if (wqe->wr.send_flags & IB_SEND_SOLICITED) | |
651 | bth0 |= IB_BTH_SOLICITED; | |
652 | } | |
653 | bth2 |= IB_BTH_REQ_ACK; | |
654 | qp->s_cur++; | |
655 | if (qp->s_cur >= qp->s_size) | |
656 | qp->s_cur = 0; | |
657 | break; | |
658 | ||
659 | case OP(RDMA_READ_RESPONSE_MIDDLE): | |
660 | /* | |
661 | * qp->s_state is normally set to the opcode of the | |
662 | * last packet constructed for new requests and therefore | |
663 | * is never set to RDMA read response. | |
664 | * RDMA_READ_RESPONSE_MIDDLE is used by the ACK processing | |
665 | * thread to indicate a RDMA read needs to be restarted from | |
666 | * an earlier PSN without interfering with the sending thread. | |
667 | * See restart_rc(). | |
668 | */ | |
669 | len = (delta_psn(qp->s_psn, wqe->psn)) * pmtu; | |
261a4351 MM |
670 | put_ib_reth_vaddr( |
671 | wqe->rdma_wr.remote_addr + len, | |
672 | &ohdr->u.rc.reth); | |
77241056 | 673 | ohdr->u.rc.reth.rkey = |
e622f2f4 | 674 | cpu_to_be32(wqe->rdma_wr.rkey); |
77241056 MM |
675 | ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len); |
676 | qp->s_state = OP(RDMA_READ_REQUEST); | |
677 | hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); | |
678 | bth2 = mask_psn(qp->s_psn) | IB_BTH_REQ_ACK; | |
679 | qp->s_psn = wqe->lpsn + 1; | |
680 | ss = NULL; | |
681 | len = 0; | |
682 | qp->s_cur++; | |
683 | if (qp->s_cur == qp->s_size) | |
684 | qp->s_cur = 0; | |
685 | break; | |
686 | } | |
687 | qp->s_sending_hpsn = bth2; | |
688 | delta = delta_psn(bth2, wqe->psn); | |
689 | if (delta && delta % HFI1_PSN_CREDIT == 0) | |
690 | bth2 |= IB_BTH_REQ_ACK; | |
54d10c1e DD |
691 | if (qp->s_flags & RVT_S_SEND_ONE) { |
692 | qp->s_flags &= ~RVT_S_SEND_ONE; | |
693 | qp->s_flags |= RVT_S_WAIT_ACK; | |
77241056 MM |
694 | bth2 |= IB_BTH_REQ_ACK; |
695 | } | |
696 | qp->s_len -= len; | |
9636258f | 697 | ps->s_txreq->hdr_dwords = hwords; |
14553ca1 | 698 | ps->s_txreq->sde = priv->s_sde; |
b777f154 | 699 | ps->s_txreq->ss = ss; |
e922ae06 | 700 | ps->s_txreq->s_cur_size = len; |
77241056 MM |
701 | hfi1_make_ruc_header( |
702 | qp, | |
703 | ohdr, | |
704 | bth0 | (qp->s_state << 24), | |
44e43d91 | 705 | bth1, |
77241056 | 706 | bth2, |
bb5df5f9 DD |
707 | middle, |
708 | ps); | |
709 | return 1; | |
710 | ||
711 | done_free_tx: | |
712 | hfi1_put_txreq(ps->s_txreq); | |
713 | ps->s_txreq = NULL; | |
46a80d62 | 714 | return 1; |
bb5df5f9 | 715 | |
77241056 | 716 | bail: |
bb5df5f9 DD |
717 | hfi1_put_txreq(ps->s_txreq); |
718 | ||
719 | bail_no_tx: | |
720 | ps->s_txreq = NULL; | |
54d10c1e | 721 | qp->s_flags &= ~RVT_S_BUSY; |
bb5df5f9 | 722 | return 0; |
77241056 MM |
723 | } |
724 | ||
5b6cabb0 DH |
725 | static inline void hfi1_make_bth_aeth(struct rvt_qp *qp, |
726 | struct ib_other_headers *ohdr, | |
727 | u32 bth0, u32 bth1) | |
728 | { | |
729 | if (qp->r_nak_state) | |
730 | ohdr->u.aeth = cpu_to_be32((qp->r_msn & IB_MSN_MASK) | | |
731 | (qp->r_nak_state << | |
732 | IB_AETH_CREDIT_SHIFT)); | |
733 | else | |
734 | ohdr->u.aeth = rvt_compute_aeth(qp); | |
735 | ||
736 | ohdr->bth[0] = cpu_to_be32(bth0); | |
737 | ohdr->bth[1] = cpu_to_be32(bth1 | qp->remote_qpn); | |
738 | ohdr->bth[2] = cpu_to_be32(mask_psn(qp->r_ack_psn)); | |
739 | } | |
740 | ||
bdaf96f6 | 741 | static inline void hfi1_queue_rc_ack(struct hfi1_packet *packet, bool is_fecn) |
5b6cabb0 | 742 | { |
bdaf96f6 SS |
743 | struct rvt_qp *qp = packet->qp; |
744 | struct hfi1_ibport *ibp; | |
5b6cabb0 DH |
745 | unsigned long flags; |
746 | ||
747 | spin_lock_irqsave(&qp->s_lock, flags); | |
748 | if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) | |
749 | goto unlock; | |
bdaf96f6 | 750 | ibp = rcd_to_iport(packet->rcd); |
5b6cabb0 DH |
751 | this_cpu_inc(*ibp->rvp.rc_qacks); |
752 | qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING; | |
753 | qp->s_nak_state = qp->r_nak_state; | |
754 | qp->s_ack_psn = qp->r_ack_psn; | |
755 | if (is_fecn) | |
756 | qp->s_flags |= RVT_S_ECN; | |
757 | ||
758 | /* Schedule the send tasklet. */ | |
759 | hfi1_schedule_send(qp); | |
760 | unlock: | |
761 | spin_unlock_irqrestore(&qp->s_lock, flags); | |
762 | } | |
763 | ||
bdaf96f6 | 764 | static inline void hfi1_make_rc_ack_9B(struct hfi1_packet *packet, |
5b6cabb0 DH |
765 | struct hfi1_opa_header *opa_hdr, |
766 | u8 sc5, bool is_fecn, | |
767 | u64 *pbc_flags, u32 *hwords, | |
768 | u32 *nwords) | |
769 | { | |
bdaf96f6 SS |
770 | struct rvt_qp *qp = packet->qp; |
771 | struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); | |
5b6cabb0 DH |
772 | struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); |
773 | struct ib_header *hdr = &opa_hdr->ibh; | |
774 | struct ib_other_headers *ohdr; | |
775 | u16 lrh0 = HFI1_LRH_BTH; | |
776 | u16 pkey; | |
777 | u32 bth0, bth1; | |
778 | ||
779 | opa_hdr->hdr_type = HFI1_PKT_TYPE_9B; | |
780 | ohdr = &hdr->u.oth; | |
781 | /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4 */ | |
782 | *hwords = 6; | |
783 | ||
784 | if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) { | |
785 | *hwords += hfi1_make_grh(ibp, &hdr->u.l.grh, | |
786 | rdma_ah_read_grh(&qp->remote_ah_attr), | |
787 | *hwords - 2, SIZE_OF_CRC); | |
788 | ohdr = &hdr->u.l.oth; | |
789 | lrh0 = HFI1_LRH_GRH; | |
790 | } | |
791 | /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ | |
792 | *pbc_flags |= ((!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT); | |
793 | ||
794 | /* read pkey_index w/o lock (its atomic) */ | |
795 | pkey = hfi1_get_pkey(ibp, qp->s_pkey_index); | |
796 | ||
797 | lrh0 |= (sc5 & IB_SC_MASK) << IB_SC_SHIFT | | |
798 | (rdma_ah_get_sl(&qp->remote_ah_attr) & IB_SL_MASK) << | |
799 | IB_SL_SHIFT; | |
800 | ||
801 | hfi1_make_ib_hdr(hdr, lrh0, *hwords + SIZE_OF_CRC, | |
802 | opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr), 9B), | |
803 | ppd->lid | rdma_ah_get_path_bits(&qp->remote_ah_attr)); | |
804 | ||
805 | bth0 = pkey | (OP(ACKNOWLEDGE) << 24); | |
806 | if (qp->s_mig_state == IB_MIG_MIGRATED) | |
807 | bth0 |= IB_BTH_MIG_REQ; | |
808 | bth1 = (!!is_fecn) << IB_BECN_SHIFT; | |
44e43d91 MH |
809 | /* |
810 | * Inline ACKs go out without the use of the Verbs send engine, so | |
811 | * we need to set the STL Verbs Extended bit here | |
812 | */ | |
813 | bth1 |= HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT; | |
5b6cabb0 DH |
814 | hfi1_make_bth_aeth(qp, ohdr, bth0, bth1); |
815 | } | |
816 | ||
bdaf96f6 | 817 | static inline void hfi1_make_rc_ack_16B(struct hfi1_packet *packet, |
5b6cabb0 DH |
818 | struct hfi1_opa_header *opa_hdr, |
819 | u8 sc5, bool is_fecn, | |
820 | u64 *pbc_flags, u32 *hwords, | |
821 | u32 *nwords) | |
822 | { | |
bdaf96f6 SS |
823 | struct rvt_qp *qp = packet->qp; |
824 | struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); | |
5b6cabb0 DH |
825 | struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); |
826 | struct hfi1_16b_header *hdr = &opa_hdr->opah; | |
827 | struct ib_other_headers *ohdr; | |
8935780b | 828 | u32 bth0, bth1 = 0; |
5b6cabb0 | 829 | u16 len, pkey; |
ca85bb1c | 830 | bool becn = is_fecn; |
5b6cabb0 DH |
831 | u8 l4 = OPA_16B_L4_IB_LOCAL; |
832 | u8 extra_bytes; | |
833 | ||
834 | opa_hdr->hdr_type = HFI1_PKT_TYPE_16B; | |
835 | ohdr = &hdr->u.oth; | |
836 | /* header size in 32-bit words 16B LRH+BTH+AETH = (16+12+4)/4 */ | |
837 | *hwords = 8; | |
838 | extra_bytes = hfi1_get_16b_padding(*hwords << 2, 0); | |
839 | *nwords = SIZE_OF_CRC + ((extra_bytes + SIZE_OF_LT) >> 2); | |
840 | ||
841 | if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) && | |
842 | hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr))) { | |
843 | *hwords += hfi1_make_grh(ibp, &hdr->u.l.grh, | |
844 | rdma_ah_read_grh(&qp->remote_ah_attr), | |
845 | *hwords - 4, *nwords); | |
846 | ohdr = &hdr->u.l.oth; | |
847 | l4 = OPA_16B_L4_IB_GLOBAL; | |
848 | } | |
849 | *pbc_flags |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC; | |
850 | ||
851 | /* read pkey_index w/o lock (its atomic) */ | |
852 | pkey = hfi1_get_pkey(ibp, qp->s_pkey_index); | |
853 | ||
854 | /* Convert dwords to flits */ | |
855 | len = (*hwords + *nwords) >> 1; | |
856 | ||
3cafad43 DH |
857 | hfi1_make_16b_hdr(hdr, ppd->lid | |
858 | (rdma_ah_get_path_bits(&qp->remote_ah_attr) & | |
859 | ((1 << ppd->lmc) - 1)), | |
5b6cabb0 | 860 | opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr), |
3cafad43 | 861 | 16B), len, pkey, becn, 0, l4, sc5); |
5b6cabb0 DH |
862 | |
863 | bth0 = pkey | (OP(ACKNOWLEDGE) << 24); | |
864 | bth0 |= extra_bytes << 20; | |
865 | if (qp->s_mig_state == IB_MIG_MIGRATED) | |
866 | bth1 = OPA_BTH_MIG_REQ; | |
867 | hfi1_make_bth_aeth(qp, ohdr, bth0, bth1); | |
868 | } | |
869 | ||
bdaf96f6 | 870 | typedef void (*hfi1_make_rc_ack)(struct hfi1_packet *packet, |
5b6cabb0 DH |
871 | struct hfi1_opa_header *opa_hdr, |
872 | u8 sc5, bool is_fecn, | |
873 | u64 *pbc_flags, u32 *hwords, | |
874 | u32 *nwords); | |
875 | ||
876 | /* We support only two types - 9B and 16B for now */ | |
877 | static const hfi1_make_rc_ack hfi1_make_rc_ack_tbl[2] = { | |
878 | [HFI1_PKT_TYPE_9B] = &hfi1_make_rc_ack_9B, | |
879 | [HFI1_PKT_TYPE_16B] = &hfi1_make_rc_ack_16B | |
880 | }; | |
881 | ||
77241056 MM |
882 | /** |
883 | * hfi1_send_rc_ack - Construct an ACK packet and send it | |
884 | * @qp: a pointer to the QP | |
885 | * | |
886 | * This is called from hfi1_rc_rcv() and handle_receive_interrupt(). | |
887 | * Note that RDMA reads and atomics are handled in the | |
ca00c62b | 888 | * send side QP state and send engine. |
77241056 | 889 | */ |
bdaf96f6 | 890 | void hfi1_send_rc_ack(struct hfi1_packet *packet, bool is_fecn) |
77241056 | 891 | { |
bdaf96f6 SS |
892 | struct hfi1_ctxtdata *rcd = packet->rcd; |
893 | struct rvt_qp *qp = packet->qp; | |
f3e862cb | 894 | struct hfi1_ibport *ibp = rcd_to_iport(rcd); |
5b6cabb0 | 895 | struct hfi1_qp_priv *priv = qp->priv; |
77241056 | 896 | struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); |
5b6cabb0 | 897 | u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)]; |
77241056 | 898 | u64 pbc, pbc_flags = 0; |
5b6cabb0 DH |
899 | u32 hwords = 0; |
900 | u32 nwords = 0; | |
901 | u32 plen; | |
77241056 | 902 | struct pio_buf *pbuf; |
5b6cabb0 | 903 | struct hfi1_opa_header opa_hdr; |
4fcf1de5 MM |
904 | |
905 | /* clear the defer count */ | |
688f21c0 | 906 | qp->r_adefered = 0; |
77241056 MM |
907 | |
908 | /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ | |
5b6cabb0 | 909 | if (qp->s_flags & RVT_S_RESP_PENDING) { |
bdaf96f6 | 910 | hfi1_queue_rc_ack(packet, is_fecn); |
5b6cabb0 DH |
911 | return; |
912 | } | |
77241056 MM |
913 | |
914 | /* Ensure s_rdma_ack_cnt changes are committed */ | |
5b6cabb0 | 915 | if (qp->s_rdma_ack_cnt) { |
bdaf96f6 | 916 | hfi1_queue_rc_ack(packet, is_fecn); |
5b6cabb0 | 917 | return; |
77241056 | 918 | } |
77241056 MM |
919 | |
920 | /* Don't try to send ACKs if the link isn't ACTIVE */ | |
921 | if (driver_lstate(ppd) != IB_PORT_ACTIVE) | |
922 | return; | |
923 | ||
5b6cabb0 | 924 | /* Make the appropriate header */ |
bdaf96f6 | 925 | hfi1_make_rc_ack_tbl[priv->hdr_type](packet, &opa_hdr, sc5, is_fecn, |
5b6cabb0 | 926 | &pbc_flags, &hwords, &nwords); |
77241056 | 927 | |
5b6cabb0 DH |
928 | plen = 2 /* PBC */ + hwords + nwords; |
929 | pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, | |
930 | sc_to_vlt(ppd->dd, sc5), plen); | |
931 | pbuf = sc_buffer_alloc(rcd->sc, plen, NULL, NULL); | |
77241056 MM |
932 | if (!pbuf) { |
933 | /* | |
934 | * We have no room to send at the moment. Pass | |
ca00c62b | 935 | * responsibility for sending the ACK to the send engine |
77241056 MM |
936 | * so that when enough buffer space becomes available, |
937 | * the ACK is sent ahead of other outgoing packets. | |
938 | */ | |
bdaf96f6 | 939 | hfi1_queue_rc_ack(packet, is_fecn); |
5b6cabb0 | 940 | return; |
77241056 | 941 | } |
228d2af1 | 942 | trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device), |
5b6cabb0 | 943 | &opa_hdr, ib_is_sc5(sc5)); |
77241056 MM |
944 | |
945 | /* write the pbc and data */ | |
5b6cabb0 DH |
946 | ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, |
947 | (priv->hdr_type == HFI1_PKT_TYPE_9B ? | |
948 | (void *)&opa_hdr.ibh : | |
949 | (void *)&opa_hdr.opah), hwords); | |
77241056 | 950 | return; |
77241056 MM |
951 | } |
952 | ||
953 | /** | |
954 | * reset_psn - reset the QP state to send starting from PSN | |
955 | * @qp: the QP | |
956 | * @psn: the packet sequence number to restart at | |
957 | * | |
958 | * This is called from hfi1_rc_rcv() to process an incoming RC ACK | |
959 | * for the given QP. | |
960 | * Called at interrupt level with the QP s_lock held. | |
961 | */ | |
895420dd | 962 | static void reset_psn(struct rvt_qp *qp, u32 psn) |
77241056 MM |
963 | { |
964 | u32 n = qp->s_acked; | |
83693bd1 | 965 | struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n); |
77241056 MM |
966 | u32 opcode; |
967 | ||
68e78b3d | 968 | lockdep_assert_held(&qp->s_lock); |
77241056 MM |
969 | qp->s_cur = n; |
970 | ||
971 | /* | |
972 | * If we are starting the request from the beginning, | |
973 | * let the normal send code handle initialization. | |
974 | */ | |
975 | if (cmp_psn(psn, wqe->psn) <= 0) { | |
976 | qp->s_state = OP(SEND_LAST); | |
977 | goto done; | |
978 | } | |
979 | ||
980 | /* Find the work request opcode corresponding to the given PSN. */ | |
981 | opcode = wqe->wr.opcode; | |
982 | for (;;) { | |
983 | int diff; | |
984 | ||
985 | if (++n == qp->s_size) | |
986 | n = 0; | |
987 | if (n == qp->s_tail) | |
988 | break; | |
83693bd1 | 989 | wqe = rvt_get_swqe_ptr(qp, n); |
77241056 MM |
990 | diff = cmp_psn(psn, wqe->psn); |
991 | if (diff < 0) | |
992 | break; | |
993 | qp->s_cur = n; | |
994 | /* | |
995 | * If we are starting the request from the beginning, | |
996 | * let the normal send code handle initialization. | |
997 | */ | |
998 | if (diff == 0) { | |
999 | qp->s_state = OP(SEND_LAST); | |
1000 | goto done; | |
1001 | } | |
1002 | opcode = wqe->wr.opcode; | |
1003 | } | |
1004 | ||
1005 | /* | |
1006 | * Set the state to restart in the middle of a request. | |
1007 | * Don't change the s_sge, s_cur_sge, or s_cur_size. | |
1008 | * See hfi1_make_rc_req(). | |
1009 | */ | |
1010 | switch (opcode) { | |
1011 | case IB_WR_SEND: | |
1012 | case IB_WR_SEND_WITH_IMM: | |
1013 | qp->s_state = OP(RDMA_READ_RESPONSE_FIRST); | |
1014 | break; | |
1015 | ||
1016 | case IB_WR_RDMA_WRITE: | |
1017 | case IB_WR_RDMA_WRITE_WITH_IMM: | |
1018 | qp->s_state = OP(RDMA_READ_RESPONSE_LAST); | |
1019 | break; | |
1020 | ||
1021 | case IB_WR_RDMA_READ: | |
1022 | qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE); | |
1023 | break; | |
1024 | ||
1025 | default: | |
1026 | /* | |
1027 | * This case shouldn't happen since its only | |
1028 | * one PSN per req. | |
1029 | */ | |
1030 | qp->s_state = OP(SEND_LAST); | |
1031 | } | |
1032 | done: | |
1033 | qp->s_psn = psn; | |
1034 | /* | |
54d10c1e | 1035 | * Set RVT_S_WAIT_PSN as rc_complete() may start the timer |
ca00c62b | 1036 | * asynchronously before the send engine can get scheduled. |
77241056 MM |
1037 | * Doing it in hfi1_make_rc_req() is too late. |
1038 | */ | |
1039 | if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) && | |
1040 | (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)) | |
54d10c1e | 1041 | qp->s_flags |= RVT_S_WAIT_PSN; |
2e2ba09e | 1042 | qp->s_flags &= ~HFI1_S_AHG_VALID; |
77241056 MM |
1043 | } |
1044 | ||
1045 | /* | |
1046 | * Back up requester to resend the last un-ACKed request. | |
1047 | * The QP r_lock and s_lock should be held and interrupts disabled. | |
1048 | */ | |
56acbbfb | 1049 | void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait) |
77241056 | 1050 | { |
48a615dc | 1051 | struct hfi1_qp_priv *priv = qp->priv; |
83693bd1 | 1052 | struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked); |
77241056 MM |
1053 | struct hfi1_ibport *ibp; |
1054 | ||
68e78b3d MM |
1055 | lockdep_assert_held(&qp->r_lock); |
1056 | lockdep_assert_held(&qp->s_lock); | |
77241056 MM |
1057 | if (qp->s_retry == 0) { |
1058 | if (qp->s_mig_state == IB_MIG_ARMED) { | |
1059 | hfi1_migrate_qp(qp); | |
1060 | qp->s_retry = qp->s_retry_cnt; | |
1061 | } else if (qp->s_last == qp->s_acked) { | |
48a615dc KW |
1062 | /* |
1063 | * We need special handling for the OPFN request WQEs as | |
1064 | * they are not allowed to generate real user errors | |
1065 | */ | |
1066 | if (wqe->wr.opcode == IB_WR_OPFN) { | |
1067 | struct hfi1_ibport *ibp = | |
1068 | to_iport(qp->ibqp.device, qp->port_num); | |
1069 | /* | |
1070 | * Call opfn_conn_reply() with capcode and | |
1071 | * remaining data as 0 to close out the | |
1072 | * current request | |
1073 | */ | |
1074 | opfn_conn_reply(qp, priv->opfn.curr); | |
1075 | wqe = do_rc_completion(qp, wqe, ibp); | |
1076 | qp->s_flags &= ~RVT_S_WAIT_ACK; | |
1077 | } else { | |
1078 | rvt_send_complete(qp, wqe, | |
1079 | IB_WC_RETRY_EXC_ERR); | |
1080 | rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); | |
1081 | } | |
77241056 | 1082 | return; |
e490974e | 1083 | } else { /* need to handle delayed completion */ |
77241056 | 1084 | return; |
e490974e JJ |
1085 | } |
1086 | } else { | |
77241056 | 1087 | qp->s_retry--; |
e490974e | 1088 | } |
77241056 MM |
1089 | |
1090 | ibp = to_iport(qp->ibqp.device, qp->port_num); | |
1091 | if (wqe->wr.opcode == IB_WR_RDMA_READ) | |
4eb06882 | 1092 | ibp->rvp.n_rc_resends++; |
77241056 | 1093 | else |
4eb06882 | 1094 | ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn); |
77241056 | 1095 | |
54d10c1e DD |
1096 | qp->s_flags &= ~(RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR | |
1097 | RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_PSN | | |
1098 | RVT_S_WAIT_ACK); | |
77241056 | 1099 | if (wait) |
54d10c1e | 1100 | qp->s_flags |= RVT_S_SEND_ONE; |
77241056 MM |
1101 | reset_psn(qp, psn); |
1102 | } | |
1103 | ||
77241056 MM |
1104 | /* |
1105 | * Set qp->s_sending_psn to the next PSN after the given one. | |
1106 | * This would be psn+1 except when RDMA reads are present. | |
1107 | */ | |
895420dd | 1108 | static void reset_sending_psn(struct rvt_qp *qp, u32 psn) |
77241056 | 1109 | { |
895420dd | 1110 | struct rvt_swqe *wqe; |
77241056 MM |
1111 | u32 n = qp->s_last; |
1112 | ||
68e78b3d | 1113 | lockdep_assert_held(&qp->s_lock); |
77241056 MM |
1114 | /* Find the work request corresponding to the given PSN. */ |
1115 | for (;;) { | |
83693bd1 | 1116 | wqe = rvt_get_swqe_ptr(qp, n); |
77241056 MM |
1117 | if (cmp_psn(psn, wqe->lpsn) <= 0) { |
1118 | if (wqe->wr.opcode == IB_WR_RDMA_READ) | |
1119 | qp->s_sending_psn = wqe->lpsn + 1; | |
1120 | else | |
1121 | qp->s_sending_psn = psn + 1; | |
1122 | break; | |
1123 | } | |
1124 | if (++n == qp->s_size) | |
1125 | n = 0; | |
1126 | if (n == qp->s_tail) | |
1127 | break; | |
1128 | } | |
1129 | } | |
1130 | ||
1131 | /* | |
1132 | * This should be called with the QP s_lock held and interrupts disabled. | |
1133 | */ | |
30e07416 | 1134 | void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) |
77241056 | 1135 | { |
261a4351 | 1136 | struct ib_other_headers *ohdr; |
5b6cabb0 | 1137 | struct hfi1_qp_priv *priv = qp->priv; |
895420dd | 1138 | struct rvt_swqe *wqe; |
5b6cabb0 DH |
1139 | struct ib_header *hdr = NULL; |
1140 | struct hfi1_16b_header *hdr_16b = NULL; | |
77241056 MM |
1141 | u32 opcode; |
1142 | u32 psn; | |
1143 | ||
68e78b3d | 1144 | lockdep_assert_held(&qp->s_lock); |
f9215b5e | 1145 | if (!(ib_rvt_state_ops[qp->state] & RVT_SEND_OR_FLUSH_OR_RECV_OK)) |
77241056 MM |
1146 | return; |
1147 | ||
1148 | /* Find out where the BTH is */ | |
5b6cabb0 DH |
1149 | if (priv->hdr_type == HFI1_PKT_TYPE_9B) { |
1150 | hdr = &opah->ibh; | |
1151 | if (ib_get_lnh(hdr) == HFI1_LRH_BTH) | |
1152 | ohdr = &hdr->u.oth; | |
1153 | else | |
1154 | ohdr = &hdr->u.l.oth; | |
1155 | } else { | |
1156 | u8 l4; | |
1157 | ||
1158 | hdr_16b = &opah->opah; | |
1159 | l4 = hfi1_16B_get_l4(hdr_16b); | |
1160 | if (l4 == OPA_16B_L4_IB_LOCAL) | |
1161 | ohdr = &hdr_16b->u.oth; | |
1162 | else | |
1163 | ohdr = &hdr_16b->u.l.oth; | |
1164 | } | |
77241056 | 1165 | |
cb427057 | 1166 | opcode = ib_bth_get_opcode(ohdr); |
77241056 MM |
1167 | if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && |
1168 | opcode <= OP(ATOMIC_ACKNOWLEDGE)) { | |
1169 | WARN_ON(!qp->s_rdma_ack_cnt); | |
1170 | qp->s_rdma_ack_cnt--; | |
1171 | return; | |
1172 | } | |
1173 | ||
7dafbab3 | 1174 | psn = ib_bth_get_psn(ohdr); |
77241056 MM |
1175 | reset_sending_psn(qp, psn); |
1176 | ||
1177 | /* | |
1178 | * Start timer after a packet requesting an ACK has been sent and | |
1179 | * there are still requests that haven't been acked. | |
1180 | */ | |
1181 | if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail && | |
1182 | !(qp->s_flags & | |
54d10c1e | 1183 | (RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) && |
83693bd1 | 1184 | (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) |
56acbbfb | 1185 | rvt_add_retry_timer(qp); |
77241056 MM |
1186 | |
1187 | while (qp->s_last != qp->s_acked) { | |
6c2ab0b8 MM |
1188 | u32 s_last; |
1189 | ||
83693bd1 | 1190 | wqe = rvt_get_swqe_ptr(qp, qp->s_last); |
77241056 MM |
1191 | if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 && |
1192 | cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) | |
1193 | break; | |
ca95f802 | 1194 | rvt_qp_wqe_unreserve(qp, wqe); |
6c2ab0b8 | 1195 | s_last = qp->s_last; |
9260b354 | 1196 | trace_hfi1_qp_send_completion(qp, wqe, s_last); |
6c2ab0b8 MM |
1197 | if (++s_last >= qp->s_size) |
1198 | s_last = 0; | |
1199 | qp->s_last = s_last; | |
1200 | /* see post_send() */ | |
1201 | barrier(); | |
c64607aa | 1202 | rvt_put_swqe(wqe); |
43a474aa MM |
1203 | rvt_qp_swqe_complete(qp, |
1204 | wqe, | |
1205 | ib_hfi1_wc_opcode[wqe->wr.opcode], | |
1206 | IB_WC_SUCCESS); | |
77241056 MM |
1207 | } |
1208 | /* | |
1209 | * If we were waiting for sends to complete before re-sending, | |
1210 | * and they are now complete, restart sending. | |
1211 | */ | |
462b6b21 | 1212 | trace_hfi1_sendcomplete(qp, psn); |
54d10c1e | 1213 | if (qp->s_flags & RVT_S_WAIT_PSN && |
77241056 | 1214 | cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { |
54d10c1e | 1215 | qp->s_flags &= ~RVT_S_WAIT_PSN; |
77241056 MM |
1216 | qp->s_sending_psn = qp->s_psn; |
1217 | qp->s_sending_hpsn = qp->s_psn - 1; | |
1218 | hfi1_schedule_send(qp); | |
1219 | } | |
1220 | } | |
1221 | ||
895420dd | 1222 | static inline void update_last_psn(struct rvt_qp *qp, u32 psn) |
77241056 MM |
1223 | { |
1224 | qp->s_last_psn = psn; | |
1225 | } | |
1226 | ||
1227 | /* | |
1228 | * Generate a SWQE completion. | |
1229 | * This is similar to hfi1_send_complete but has to check to be sure | |
1230 | * that the SGEs are not being referenced if the SWQE is being resent. | |
1231 | */ | |
895420dd DD |
1232 | static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, |
1233 | struct rvt_swqe *wqe, | |
1234 | struct hfi1_ibport *ibp) | |
77241056 | 1235 | { |
68e78b3d | 1236 | lockdep_assert_held(&qp->s_lock); |
77241056 MM |
1237 | /* |
1238 | * Don't decrement refcount and don't generate a | |
1239 | * completion if the SWQE is being resent until the send | |
1240 | * is finished. | |
1241 | */ | |
1242 | if (cmp_psn(wqe->lpsn, qp->s_sending_psn) < 0 || | |
1243 | cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { | |
6c2ab0b8 MM |
1244 | u32 s_last; |
1245 | ||
c64607aa | 1246 | rvt_put_swqe(wqe); |
ca95f802 | 1247 | rvt_qp_wqe_unreserve(qp, wqe); |
6c2ab0b8 | 1248 | s_last = qp->s_last; |
9260b354 | 1249 | trace_hfi1_qp_send_completion(qp, wqe, s_last); |
6c2ab0b8 MM |
1250 | if (++s_last >= qp->s_size) |
1251 | s_last = 0; | |
1252 | qp->s_last = s_last; | |
1253 | /* see post_send() */ | |
1254 | barrier(); | |
43a474aa MM |
1255 | rvt_qp_swqe_complete(qp, |
1256 | wqe, | |
1257 | ib_hfi1_wc_opcode[wqe->wr.opcode], | |
1258 | IB_WC_SUCCESS); | |
77241056 MM |
1259 | } else { |
1260 | struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); | |
1261 | ||
4eb06882 | 1262 | this_cpu_inc(*ibp->rvp.rc_delayed_comp); |
77241056 MM |
1263 | /* |
1264 | * If send progress not running attempt to progress | |
1265 | * SDMA queue. | |
1266 | */ | |
1267 | if (ppd->dd->flags & HFI1_HAS_SEND_DMA) { | |
1268 | struct sdma_engine *engine; | |
d8966fcd | 1269 | u8 sl = rdma_ah_get_sl(&qp->remote_ah_attr); |
77241056 MM |
1270 | u8 sc5; |
1271 | ||
1272 | /* For now use sc to find engine */ | |
d8966fcd | 1273 | sc5 = ibp->sl_to_sc[sl]; |
77241056 MM |
1274 | engine = qp_to_sdma_engine(qp, sc5); |
1275 | sdma_engine_progress_schedule(engine); | |
1276 | } | |
1277 | } | |
1278 | ||
1279 | qp->s_retry = qp->s_retry_cnt; | |
1280 | update_last_psn(qp, wqe->lpsn); | |
1281 | ||
1282 | /* | |
1283 | * If we are completing a request which is in the process of | |
1284 | * being resent, we can stop re-sending it since we know the | |
1285 | * responder has already seen it. | |
1286 | */ | |
1287 | if (qp->s_acked == qp->s_cur) { | |
1288 | if (++qp->s_cur >= qp->s_size) | |
1289 | qp->s_cur = 0; | |
1290 | qp->s_acked = qp->s_cur; | |
83693bd1 | 1291 | wqe = rvt_get_swqe_ptr(qp, qp->s_cur); |
77241056 MM |
1292 | if (qp->s_acked != qp->s_tail) { |
1293 | qp->s_state = OP(SEND_LAST); | |
1294 | qp->s_psn = wqe->psn; | |
1295 | } | |
1296 | } else { | |
1297 | if (++qp->s_acked >= qp->s_size) | |
1298 | qp->s_acked = 0; | |
1299 | if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur) | |
1300 | qp->s_draining = 0; | |
83693bd1 | 1301 | wqe = rvt_get_swqe_ptr(qp, qp->s_acked); |
77241056 MM |
1302 | } |
1303 | return wqe; | |
1304 | } | |
1305 | ||
1306 | /** | |
1307 | * do_rc_ack - process an incoming RC ACK | |
1308 | * @qp: the QP the ACK came in on | |
1309 | * @psn: the packet sequence number of the ACK | |
1310 | * @opcode: the opcode of the request that resulted in the ACK | |
1311 | * | |
1312 | * This is called from rc_rcv_resp() to process an incoming RC ACK | |
1313 | * for the given QP. | |
b77d713a | 1314 | * May be called at interrupt level, with the QP s_lock held. |
77241056 MM |
1315 | * Returns 1 if OK, 0 if current operation should be aborted (NAK). |
1316 | */ | |
895420dd | 1317 | static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, |
77241056 MM |
1318 | u64 val, struct hfi1_ctxtdata *rcd) |
1319 | { | |
1320 | struct hfi1_ibport *ibp; | |
1321 | enum ib_wc_status status; | |
895420dd | 1322 | struct rvt_swqe *wqe; |
77241056 MM |
1323 | int ret = 0; |
1324 | u32 ack_psn; | |
1325 | int diff; | |
1326 | ||
68e78b3d | 1327 | lockdep_assert_held(&qp->s_lock); |
77241056 MM |
1328 | /* |
1329 | * Note that NAKs implicitly ACK outstanding SEND and RDMA write | |
1330 | * requests and implicitly NAK RDMA read and atomic requests issued | |
1331 | * before the NAK'ed request. The MSN won't include the NAK'ed | |
1332 | * request but will include an ACK'ed request(s). | |
1333 | */ | |
1334 | ack_psn = psn; | |
832666c1 | 1335 | if (aeth >> IB_AETH_NAK_SHIFT) |
77241056 | 1336 | ack_psn--; |
83693bd1 | 1337 | wqe = rvt_get_swqe_ptr(qp, qp->s_acked); |
f3e862cb | 1338 | ibp = rcd_to_iport(rcd); |
77241056 MM |
1339 | |
1340 | /* | |
1341 | * The MSN might be for a later WQE than the PSN indicates so | |
1342 | * only complete WQEs that the PSN finishes. | |
1343 | */ | |
1344 | while ((diff = delta_psn(ack_psn, wqe->lpsn)) >= 0) { | |
1345 | /* | |
1346 | * RDMA_READ_RESPONSE_ONLY is a special case since | |
1347 | * we want to generate completion events for everything | |
1348 | * before the RDMA read, copy the data, then generate | |
1349 | * the completion for the read. | |
1350 | */ | |
1351 | if (wqe->wr.opcode == IB_WR_RDMA_READ && | |
1352 | opcode == OP(RDMA_READ_RESPONSE_ONLY) && | |
1353 | diff == 0) { | |
1354 | ret = 1; | |
633d2739 | 1355 | goto bail_stop; |
77241056 MM |
1356 | } |
1357 | /* | |
1358 | * If this request is a RDMA read or atomic, and the ACK is | |
1359 | * for a later operation, this ACK NAKs the RDMA read or | |
1360 | * atomic. In other words, only a RDMA_READ_LAST or ONLY | |
1361 | * can ACK a RDMA read and likewise for atomic ops. Note | |
1362 | * that the NAK case can only happen if relaxed ordering is | |
1363 | * used and requests are sent after an RDMA read or atomic | |
1364 | * is sent but before the response is received. | |
1365 | */ | |
1366 | if ((wqe->wr.opcode == IB_WR_RDMA_READ && | |
1367 | (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) || | |
1368 | ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || | |
1369 | wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && | |
1370 | (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) { | |
1371 | /* Retry this request. */ | |
54d10c1e DD |
1372 | if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) { |
1373 | qp->r_flags |= RVT_R_RDMAR_SEQ; | |
56acbbfb | 1374 | hfi1_restart_rc(qp, qp->s_last_psn + 1, 0); |
77241056 | 1375 | if (list_empty(&qp->rspwait)) { |
54d10c1e | 1376 | qp->r_flags |= RVT_R_RSP_SEND; |
4d6f85c3 | 1377 | rvt_get_qp(qp); |
77241056 MM |
1378 | list_add_tail(&qp->rspwait, |
1379 | &rcd->qp_wait_list); | |
1380 | } | |
1381 | } | |
1382 | /* | |
1383 | * No need to process the ACK/NAK since we are | |
1384 | * restarting an earlier request. | |
1385 | */ | |
633d2739 | 1386 | goto bail_stop; |
77241056 MM |
1387 | } |
1388 | if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || | |
1389 | wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) { | |
1390 | u64 *vaddr = wqe->sg_list[0].vaddr; | |
1391 | *vaddr = val; | |
1392 | } | |
48a615dc KW |
1393 | if (wqe->wr.opcode == IB_WR_OPFN) |
1394 | opfn_conn_reply(qp, val); | |
1395 | ||
77241056 MM |
1396 | if (qp->s_num_rd_atomic && |
1397 | (wqe->wr.opcode == IB_WR_RDMA_READ || | |
1398 | wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || | |
1399 | wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) { | |
1400 | qp->s_num_rd_atomic--; | |
1401 | /* Restart sending task if fence is complete */ | |
54d10c1e | 1402 | if ((qp->s_flags & RVT_S_WAIT_FENCE) && |
77241056 | 1403 | !qp->s_num_rd_atomic) { |
54d10c1e DD |
1404 | qp->s_flags &= ~(RVT_S_WAIT_FENCE | |
1405 | RVT_S_WAIT_ACK); | |
77241056 | 1406 | hfi1_schedule_send(qp); |
54d10c1e DD |
1407 | } else if (qp->s_flags & RVT_S_WAIT_RDMAR) { |
1408 | qp->s_flags &= ~(RVT_S_WAIT_RDMAR | | |
1409 | RVT_S_WAIT_ACK); | |
77241056 MM |
1410 | hfi1_schedule_send(qp); |
1411 | } | |
1412 | } | |
1413 | wqe = do_rc_completion(qp, wqe, ibp); | |
1414 | if (qp->s_acked == qp->s_tail) | |
1415 | break; | |
1416 | } | |
1417 | ||
832666c1 | 1418 | switch (aeth >> IB_AETH_NAK_SHIFT) { |
77241056 | 1419 | case 0: /* ACK */ |
4eb06882 | 1420 | this_cpu_inc(*ibp->rvp.rc_acks); |
77241056 MM |
1421 | if (qp->s_acked != qp->s_tail) { |
1422 | /* | |
1423 | * We are expecting more ACKs so | |
633d2739 | 1424 | * mod the retry timer. |
77241056 | 1425 | */ |
56acbbfb | 1426 | rvt_mod_retry_timer(qp); |
77241056 MM |
1427 | /* |
1428 | * We can stop re-sending the earlier packets and | |
1429 | * continue with the next packet the receiver wants. | |
1430 | */ | |
1431 | if (cmp_psn(qp->s_psn, psn) <= 0) | |
1432 | reset_psn(qp, psn + 1); | |
633d2739 MM |
1433 | } else { |
1434 | /* No more acks - kill all timers */ | |
56acbbfb | 1435 | rvt_stop_rc_timers(qp); |
633d2739 MM |
1436 | if (cmp_psn(qp->s_psn, psn) <= 0) { |
1437 | qp->s_state = OP(SEND_LAST); | |
1438 | qp->s_psn = psn + 1; | |
1439 | } | |
77241056 | 1440 | } |
54d10c1e DD |
1441 | if (qp->s_flags & RVT_S_WAIT_ACK) { |
1442 | qp->s_flags &= ~RVT_S_WAIT_ACK; | |
77241056 MM |
1443 | hfi1_schedule_send(qp); |
1444 | } | |
696513e8 | 1445 | rvt_get_credit(qp, aeth); |
77241056 MM |
1446 | qp->s_rnr_retry = qp->s_rnr_retry_cnt; |
1447 | qp->s_retry = qp->s_retry_cnt; | |
1448 | update_last_psn(qp, psn); | |
633d2739 | 1449 | return 1; |
77241056 MM |
1450 | |
1451 | case 1: /* RNR NAK */ | |
4eb06882 | 1452 | ibp->rvp.n_rnr_naks++; |
77241056 | 1453 | if (qp->s_acked == qp->s_tail) |
633d2739 | 1454 | goto bail_stop; |
54d10c1e | 1455 | if (qp->s_flags & RVT_S_WAIT_RNR) |
633d2739 | 1456 | goto bail_stop; |
77241056 MM |
1457 | if (qp->s_rnr_retry == 0) { |
1458 | status = IB_WC_RNR_RETRY_EXC_ERR; | |
1459 | goto class_b; | |
1460 | } | |
1461 | if (qp->s_rnr_retry_cnt < 7) | |
1462 | qp->s_rnr_retry--; | |
1463 | ||
1464 | /* The last valid PSN is the previous PSN. */ | |
1465 | update_last_psn(qp, psn - 1); | |
1466 | ||
4eb06882 | 1467 | ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn); |
77241056 MM |
1468 | |
1469 | reset_psn(qp, psn); | |
1470 | ||
54d10c1e | 1471 | qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK); |
56acbbfb VSD |
1472 | rvt_stop_rc_timers(qp); |
1473 | rvt_add_rnr_timer(qp, aeth); | |
633d2739 | 1474 | return 0; |
77241056 MM |
1475 | |
1476 | case 3: /* NAK */ | |
1477 | if (qp->s_acked == qp->s_tail) | |
633d2739 | 1478 | goto bail_stop; |
77241056 MM |
1479 | /* The last valid PSN is the previous PSN. */ |
1480 | update_last_psn(qp, psn - 1); | |
832666c1 DH |
1481 | switch ((aeth >> IB_AETH_CREDIT_SHIFT) & |
1482 | IB_AETH_CREDIT_MASK) { | |
77241056 | 1483 | case 0: /* PSN sequence error */ |
4eb06882 | 1484 | ibp->rvp.n_seq_naks++; |
77241056 MM |
1485 | /* |
1486 | * Back up to the responder's expected PSN. | |
1487 | * Note that we might get a NAK in the middle of an | |
1488 | * RDMA READ response which terminates the RDMA | |
1489 | * READ. | |
1490 | */ | |
56acbbfb | 1491 | hfi1_restart_rc(qp, psn, 0); |
77241056 MM |
1492 | hfi1_schedule_send(qp); |
1493 | break; | |
1494 | ||
1495 | case 1: /* Invalid Request */ | |
1496 | status = IB_WC_REM_INV_REQ_ERR; | |
4eb06882 | 1497 | ibp->rvp.n_other_naks++; |
77241056 MM |
1498 | goto class_b; |
1499 | ||
1500 | case 2: /* Remote Access Error */ | |
1501 | status = IB_WC_REM_ACCESS_ERR; | |
4eb06882 | 1502 | ibp->rvp.n_other_naks++; |
77241056 MM |
1503 | goto class_b; |
1504 | ||
1505 | case 3: /* Remote Operation Error */ | |
1506 | status = IB_WC_REM_OP_ERR; | |
4eb06882 | 1507 | ibp->rvp.n_other_naks++; |
77241056 MM |
1508 | class_b: |
1509 | if (qp->s_last == qp->s_acked) { | |
116aa033 | 1510 | rvt_send_complete(qp, wqe, status); |
ec4274f1 | 1511 | rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); |
77241056 MM |
1512 | } |
1513 | break; | |
1514 | ||
1515 | default: | |
1516 | /* Ignore other reserved NAK error codes */ | |
1517 | goto reserved; | |
1518 | } | |
1519 | qp->s_retry = qp->s_retry_cnt; | |
1520 | qp->s_rnr_retry = qp->s_rnr_retry_cnt; | |
633d2739 | 1521 | goto bail_stop; |
77241056 MM |
1522 | |
1523 | default: /* 2: reserved */ | |
1524 | reserved: | |
1525 | /* Ignore reserved NAK codes. */ | |
633d2739 | 1526 | goto bail_stop; |
77241056 | 1527 | } |
87717f0a | 1528 | /* cannot be reached */ |
633d2739 | 1529 | bail_stop: |
56acbbfb | 1530 | rvt_stop_rc_timers(qp); |
77241056 MM |
1531 | return ret; |
1532 | } | |
1533 | ||
1534 | /* | |
1535 | * We have seen an out of sequence RDMA read middle or last packet. | |
1536 | * This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE. | |
1537 | */ | |
895420dd | 1538 | static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn, |
77241056 MM |
1539 | struct hfi1_ctxtdata *rcd) |
1540 | { | |
895420dd | 1541 | struct rvt_swqe *wqe; |
77241056 | 1542 | |
68e78b3d | 1543 | lockdep_assert_held(&qp->s_lock); |
77241056 | 1544 | /* Remove QP from retry timer */ |
56acbbfb | 1545 | rvt_stop_rc_timers(qp); |
77241056 | 1546 | |
83693bd1 | 1547 | wqe = rvt_get_swqe_ptr(qp, qp->s_acked); |
77241056 MM |
1548 | |
1549 | while (cmp_psn(psn, wqe->lpsn) > 0) { | |
1550 | if (wqe->wr.opcode == IB_WR_RDMA_READ || | |
1551 | wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || | |
1552 | wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) | |
1553 | break; | |
1554 | wqe = do_rc_completion(qp, wqe, ibp); | |
1555 | } | |
1556 | ||
4eb06882 | 1557 | ibp->rvp.n_rdma_seq++; |
54d10c1e | 1558 | qp->r_flags |= RVT_R_RDMAR_SEQ; |
56acbbfb | 1559 | hfi1_restart_rc(qp, qp->s_last_psn + 1, 0); |
77241056 | 1560 | if (list_empty(&qp->rspwait)) { |
54d10c1e | 1561 | qp->r_flags |= RVT_R_RSP_SEND; |
4d6f85c3 | 1562 | rvt_get_qp(qp); |
77241056 MM |
1563 | list_add_tail(&qp->rspwait, &rcd->qp_wait_list); |
1564 | } | |
1565 | } | |
1566 | ||
1567 | /** | |
1568 | * rc_rcv_resp - process an incoming RC response packet | |
5b6cabb0 | 1569 | * @packet: data packet information |
77241056 MM |
1570 | * |
1571 | * This is called from hfi1_rc_rcv() to process an incoming RC response | |
1572 | * packet for the given QP. | |
1573 | * Called at interrupt level. | |
1574 | */ | |
5b6cabb0 | 1575 | static void rc_rcv_resp(struct hfi1_packet *packet) |
77241056 | 1576 | { |
5b6cabb0 DH |
1577 | struct hfi1_ctxtdata *rcd = packet->rcd; |
1578 | void *data = packet->payload; | |
1579 | u32 tlen = packet->tlen; | |
1580 | struct rvt_qp *qp = packet->qp; | |
bdaf96f6 | 1581 | struct hfi1_ibport *ibp; |
5b6cabb0 | 1582 | struct ib_other_headers *ohdr = packet->ohdr; |
895420dd | 1583 | struct rvt_swqe *wqe; |
77241056 MM |
1584 | enum ib_wc_status status; |
1585 | unsigned long flags; | |
1586 | int diff; | |
77241056 | 1587 | u64 val; |
5b6cabb0 DH |
1588 | u32 aeth; |
1589 | u32 psn = ib_bth_get_psn(packet->ohdr); | |
1590 | u32 pmtu = qp->pmtu; | |
1591 | u16 hdrsize = packet->hlen; | |
1592 | u8 opcode = packet->opcode; | |
1593 | u8 pad = packet->pad; | |
1594 | u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2); | |
77241056 MM |
1595 | |
1596 | spin_lock_irqsave(&qp->s_lock, flags); | |
462b6b21 | 1597 | trace_hfi1_ack(qp, psn); |
83525b61 | 1598 | |
77241056 | 1599 | /* Ignore invalid responses. */ |
eb04ff09 | 1600 | if (cmp_psn(psn, READ_ONCE(qp->s_next_psn)) >= 0) |
77241056 MM |
1601 | goto ack_done; |
1602 | ||
1603 | /* Ignore duplicate responses. */ | |
1604 | diff = cmp_psn(psn, qp->s_last_psn); | |
1605 | if (unlikely(diff <= 0)) { | |
1606 | /* Update credits for "ghost" ACKs */ | |
1607 | if (diff == 0 && opcode == OP(ACKNOWLEDGE)) { | |
1608 | aeth = be32_to_cpu(ohdr->u.aeth); | |
832666c1 | 1609 | if ((aeth >> IB_AETH_NAK_SHIFT) == 0) |
696513e8 | 1610 | rvt_get_credit(qp, aeth); |
77241056 MM |
1611 | } |
1612 | goto ack_done; | |
1613 | } | |
1614 | ||
1615 | /* | |
1616 | * Skip everything other than the PSN we expect, if we are waiting | |
1617 | * for a reply to a restarted RDMA read or atomic op. | |
1618 | */ | |
54d10c1e | 1619 | if (qp->r_flags & RVT_R_RDMAR_SEQ) { |
77241056 MM |
1620 | if (cmp_psn(psn, qp->s_last_psn + 1) != 0) |
1621 | goto ack_done; | |
54d10c1e | 1622 | qp->r_flags &= ~RVT_R_RDMAR_SEQ; |
77241056 MM |
1623 | } |
1624 | ||
1625 | if (unlikely(qp->s_acked == qp->s_tail)) | |
1626 | goto ack_done; | |
83693bd1 | 1627 | wqe = rvt_get_swqe_ptr(qp, qp->s_acked); |
77241056 MM |
1628 | status = IB_WC_SUCCESS; |
1629 | ||
1630 | switch (opcode) { | |
1631 | case OP(ACKNOWLEDGE): | |
1632 | case OP(ATOMIC_ACKNOWLEDGE): | |
1633 | case OP(RDMA_READ_RESPONSE_FIRST): | |
1634 | aeth = be32_to_cpu(ohdr->u.aeth); | |
261a4351 MM |
1635 | if (opcode == OP(ATOMIC_ACKNOWLEDGE)) |
1636 | val = ib_u64_get(&ohdr->u.at.atomic_ack_eth); | |
1637 | else | |
77241056 MM |
1638 | val = 0; |
1639 | if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) || | |
1640 | opcode != OP(RDMA_READ_RESPONSE_FIRST)) | |
1641 | goto ack_done; | |
83693bd1 | 1642 | wqe = rvt_get_swqe_ptr(qp, qp->s_acked); |
77241056 MM |
1643 | if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) |
1644 | goto ack_op_err; | |
1645 | /* | |
1646 | * If this is a response to a resent RDMA read, we | |
1647 | * have to be careful to copy the data to the right | |
1648 | * location. | |
1649 | */ | |
1650 | qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, | |
1651 | wqe, psn, pmtu); | |
1652 | goto read_middle; | |
1653 | ||
1654 | case OP(RDMA_READ_RESPONSE_MIDDLE): | |
1655 | /* no AETH, no ACK */ | |
1656 | if (unlikely(cmp_psn(psn, qp->s_last_psn + 1))) | |
1657 | goto ack_seq_err; | |
1658 | if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) | |
1659 | goto ack_op_err; | |
1660 | read_middle: | |
5b6cabb0 | 1661 | if (unlikely(tlen != (hdrsize + pmtu + extra_bytes))) |
77241056 MM |
1662 | goto ack_len_err; |
1663 | if (unlikely(pmtu >= qp->s_rdma_read_len)) | |
1664 | goto ack_len_err; | |
1665 | ||
1666 | /* | |
1667 | * We got a response so update the timeout. | |
1668 | * 4.096 usec. * (1 << qp->timeout) | |
1669 | */ | |
56acbbfb | 1670 | rvt_mod_retry_timer(qp); |
54d10c1e DD |
1671 | if (qp->s_flags & RVT_S_WAIT_ACK) { |
1672 | qp->s_flags &= ~RVT_S_WAIT_ACK; | |
77241056 MM |
1673 | hfi1_schedule_send(qp); |
1674 | } | |
1675 | ||
1676 | if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE)) | |
1677 | qp->s_retry = qp->s_retry_cnt; | |
1678 | ||
1679 | /* | |
1680 | * Update the RDMA receive state but do the copy w/o | |
1681 | * holding the locks and blocking interrupts. | |
1682 | */ | |
1683 | qp->s_rdma_read_len -= pmtu; | |
1684 | update_last_psn(qp, psn); | |
1685 | spin_unlock_irqrestore(&qp->s_lock, flags); | |
019f118b BW |
1686 | rvt_copy_sge(qp, &qp->s_rdma_read_sge, |
1687 | data, pmtu, false, false); | |
77241056 MM |
1688 | goto bail; |
1689 | ||
1690 | case OP(RDMA_READ_RESPONSE_ONLY): | |
1691 | aeth = be32_to_cpu(ohdr->u.aeth); | |
1692 | if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd)) | |
1693 | goto ack_done; | |
77241056 MM |
1694 | /* |
1695 | * Check that the data size is >= 0 && <= pmtu. | |
1696 | * Remember to account for ICRC (4). | |
1697 | */ | |
5b6cabb0 | 1698 | if (unlikely(tlen < (hdrsize + extra_bytes))) |
77241056 MM |
1699 | goto ack_len_err; |
1700 | /* | |
1701 | * If this is a response to a resent RDMA read, we | |
1702 | * have to be careful to copy the data to the right | |
1703 | * location. | |
1704 | */ | |
83693bd1 | 1705 | wqe = rvt_get_swqe_ptr(qp, qp->s_acked); |
77241056 MM |
1706 | qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, |
1707 | wqe, psn, pmtu); | |
1708 | goto read_last; | |
1709 | ||
1710 | case OP(RDMA_READ_RESPONSE_LAST): | |
1711 | /* ACKs READ req. */ | |
1712 | if (unlikely(cmp_psn(psn, qp->s_last_psn + 1))) | |
1713 | goto ack_seq_err; | |
1714 | if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) | |
1715 | goto ack_op_err; | |
77241056 MM |
1716 | /* |
1717 | * Check that the data size is >= 1 && <= pmtu. | |
1718 | * Remember to account for ICRC (4). | |
1719 | */ | |
5b6cabb0 | 1720 | if (unlikely(tlen <= (hdrsize + extra_bytes))) |
77241056 MM |
1721 | goto ack_len_err; |
1722 | read_last: | |
5b6cabb0 | 1723 | tlen -= hdrsize + extra_bytes; |
77241056 MM |
1724 | if (unlikely(tlen != qp->s_rdma_read_len)) |
1725 | goto ack_len_err; | |
1726 | aeth = be32_to_cpu(ohdr->u.aeth); | |
019f118b BW |
1727 | rvt_copy_sge(qp, &qp->s_rdma_read_sge, |
1728 | data, tlen, false, false); | |
77241056 | 1729 | WARN_ON(qp->s_rdma_read_sge.num_sge); |
50e5dcbe | 1730 | (void)do_rc_ack(qp, aeth, psn, |
77241056 MM |
1731 | OP(RDMA_READ_RESPONSE_LAST), 0, rcd); |
1732 | goto ack_done; | |
1733 | } | |
1734 | ||
1735 | ack_op_err: | |
1736 | status = IB_WC_LOC_QP_OP_ERR; | |
1737 | goto ack_err; | |
1738 | ||
1739 | ack_seq_err: | |
bdaf96f6 | 1740 | ibp = rcd_to_iport(rcd); |
77241056 MM |
1741 | rdma_seq_err(qp, ibp, psn, rcd); |
1742 | goto ack_done; | |
1743 | ||
1744 | ack_len_err: | |
1745 | status = IB_WC_LOC_LEN_ERR; | |
1746 | ack_err: | |
1747 | if (qp->s_last == qp->s_acked) { | |
116aa033 | 1748 | rvt_send_complete(qp, wqe, status); |
ec4274f1 | 1749 | rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); |
77241056 MM |
1750 | } |
1751 | ack_done: | |
1752 | spin_unlock_irqrestore(&qp->s_lock, flags); | |
1753 | bail: | |
1754 | return; | |
1755 | } | |
1756 | ||
2fd36865 | 1757 | static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd, |
895420dd | 1758 | struct rvt_qp *qp) |
2fd36865 MM |
1759 | { |
1760 | if (list_empty(&qp->rspwait)) { | |
54d10c1e | 1761 | qp->r_flags |= RVT_R_RSP_NAK; |
4d6f85c3 | 1762 | rvt_get_qp(qp); |
2fd36865 MM |
1763 | list_add_tail(&qp->rspwait, &rcd->qp_wait_list); |
1764 | } | |
1765 | } | |
1766 | ||
895420dd | 1767 | static inline void rc_cancel_ack(struct rvt_qp *qp) |
7c091e5c | 1768 | { |
688f21c0 | 1769 | qp->r_adefered = 0; |
7c091e5c MM |
1770 | if (list_empty(&qp->rspwait)) |
1771 | return; | |
1772 | list_del_init(&qp->rspwait); | |
54d10c1e | 1773 | qp->r_flags &= ~RVT_R_RSP_NAK; |
4d6f85c3 | 1774 | rvt_put_qp(qp); |
7c091e5c MM |
1775 | } |
1776 | ||
77241056 MM |
1777 | /** |
1778 | * rc_rcv_error - process an incoming duplicate or error RC packet | |
1779 | * @ohdr: the other headers for this packet | |
1780 | * @data: the packet data | |
1781 | * @qp: the QP for this packet | |
1782 | * @opcode: the opcode for this packet | |
1783 | * @psn: the packet sequence number for this packet | |
1784 | * @diff: the difference between the PSN and the expected PSN | |
1785 | * | |
1786 | * This is called from hfi1_rc_rcv() to process an unexpected | |
1787 | * incoming RC packet for the given QP. | |
1788 | * Called at interrupt level. | |
1789 | * Return 1 if no more processing is needed; otherwise return 0 to | |
1790 | * schedule a response to be sent. | |
1791 | */ | |
261a4351 | 1792 | static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data, |
17fb4f29 JJ |
1793 | struct rvt_qp *qp, u32 opcode, u32 psn, |
1794 | int diff, struct hfi1_ctxtdata *rcd) | |
77241056 | 1795 | { |
f3e862cb | 1796 | struct hfi1_ibport *ibp = rcd_to_iport(rcd); |
895420dd | 1797 | struct rvt_ack_entry *e; |
77241056 MM |
1798 | unsigned long flags; |
1799 | u8 i, prev; | |
1800 | int old_req; | |
1801 | ||
462b6b21 | 1802 | trace_hfi1_rcv_error(qp, psn); |
77241056 MM |
1803 | if (diff > 0) { |
1804 | /* | |
1805 | * Packet sequence error. | |
1806 | * A NAK will ACK earlier sends and RDMA writes. | |
1807 | * Don't queue the NAK if we already sent one. | |
1808 | */ | |
1809 | if (!qp->r_nak_state) { | |
4eb06882 | 1810 | ibp->rvp.n_rc_seqnak++; |
77241056 MM |
1811 | qp->r_nak_state = IB_NAK_PSN_ERROR; |
1812 | /* Use the expected PSN. */ | |
1813 | qp->r_ack_psn = qp->r_psn; | |
1814 | /* | |
1815 | * Wait to send the sequence NAK until all packets | |
1816 | * in the receive queue have been processed. | |
1817 | * Otherwise, we end up propagating congestion. | |
1818 | */ | |
2fd36865 | 1819 | rc_defered_ack(rcd, qp); |
77241056 MM |
1820 | } |
1821 | goto done; | |
1822 | } | |
1823 | ||
1824 | /* | |
1825 | * Handle a duplicate request. Don't re-execute SEND, RDMA | |
1826 | * write or atomic op. Don't NAK errors, just silently drop | |
1827 | * the duplicate request. Note that r_sge, r_len, and | |
1828 | * r_rcv_len may be in use so don't modify them. | |
1829 | * | |
1830 | * We are supposed to ACK the earliest duplicate PSN but we | |
1831 | * can coalesce an outstanding duplicate ACK. We have to | |
1832 | * send the earliest so that RDMA reads can be restarted at | |
1833 | * the requester's expected PSN. | |
1834 | * | |
1835 | * First, find where this duplicate PSN falls within the | |
1836 | * ACKs previously sent. | |
1837 | * old_req is true if there is an older response that is scheduled | |
1838 | * to be sent before sending this one. | |
1839 | */ | |
1840 | e = NULL; | |
1841 | old_req = 1; | |
4eb06882 | 1842 | ibp->rvp.n_rc_dupreq++; |
77241056 MM |
1843 | |
1844 | spin_lock_irqsave(&qp->s_lock, flags); | |
1845 | ||
1846 | for (i = qp->r_head_ack_queue; ; i = prev) { | |
1847 | if (i == qp->s_tail_ack_queue) | |
1848 | old_req = 0; | |
1849 | if (i) | |
1850 | prev = i - 1; | |
1851 | else | |
ddf922c3 | 1852 | prev = rvt_size_atomic(ib_to_rvt(qp->ibqp.device)); |
77241056 MM |
1853 | if (prev == qp->r_head_ack_queue) { |
1854 | e = NULL; | |
1855 | break; | |
1856 | } | |
1857 | e = &qp->s_ack_queue[prev]; | |
1858 | if (!e->opcode) { | |
1859 | e = NULL; | |
1860 | break; | |
1861 | } | |
1862 | if (cmp_psn(psn, e->psn) >= 0) { | |
1863 | if (prev == qp->s_tail_ack_queue && | |
1864 | cmp_psn(psn, e->lpsn) <= 0) | |
1865 | old_req = 0; | |
1866 | break; | |
1867 | } | |
1868 | } | |
1869 | switch (opcode) { | |
1870 | case OP(RDMA_READ_REQUEST): { | |
1871 | struct ib_reth *reth; | |
1872 | u32 offset; | |
1873 | u32 len; | |
1874 | ||
1875 | /* | |
1876 | * If we didn't find the RDMA read request in the ack queue, | |
1877 | * we can ignore this request. | |
1878 | */ | |
1879 | if (!e || e->opcode != OP(RDMA_READ_REQUEST)) | |
1880 | goto unlock_done; | |
1881 | /* RETH comes after BTH */ | |
1882 | reth = &ohdr->u.rc.reth; | |
1883 | /* | |
1884 | * Address range must be a subset of the original | |
1885 | * request and start on pmtu boundaries. | |
1886 | * We reuse the old ack_queue slot since the requester | |
1887 | * should not back up and request an earlier PSN for the | |
1888 | * same request. | |
1889 | */ | |
1890 | offset = delta_psn(psn, e->psn) * qp->pmtu; | |
1891 | len = be32_to_cpu(reth->length); | |
1892 | if (unlikely(offset + len != e->rdma_sge.sge_length)) | |
1893 | goto unlock_done; | |
1894 | if (e->rdma_sge.mr) { | |
895420dd | 1895 | rvt_put_mr(e->rdma_sge.mr); |
77241056 MM |
1896 | e->rdma_sge.mr = NULL; |
1897 | } | |
1898 | if (len != 0) { | |
1899 | u32 rkey = be32_to_cpu(reth->rkey); | |
261a4351 | 1900 | u64 vaddr = get_ib_reth_vaddr(reth); |
77241056 MM |
1901 | int ok; |
1902 | ||
895420dd DD |
1903 | ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey, |
1904 | IB_ACCESS_REMOTE_READ); | |
77241056 MM |
1905 | if (unlikely(!ok)) |
1906 | goto unlock_done; | |
1907 | } else { | |
1908 | e->rdma_sge.vaddr = NULL; | |
1909 | e->rdma_sge.length = 0; | |
1910 | e->rdma_sge.sge_length = 0; | |
1911 | } | |
1912 | e->psn = psn; | |
1913 | if (old_req) | |
1914 | goto unlock_done; | |
1915 | qp->s_tail_ack_queue = prev; | |
1916 | break; | |
1917 | } | |
1918 | ||
1919 | case OP(COMPARE_SWAP): | |
1920 | case OP(FETCH_ADD): { | |
1921 | /* | |
1922 | * If we didn't find the atomic request in the ack queue | |
ca00c62b | 1923 | * or the send engine is already backed up to send an |
77241056 MM |
1924 | * earlier entry, we can ignore this request. |
1925 | */ | |
50e5dcbe | 1926 | if (!e || e->opcode != (u8)opcode || old_req) |
77241056 MM |
1927 | goto unlock_done; |
1928 | qp->s_tail_ack_queue = prev; | |
1929 | break; | |
1930 | } | |
1931 | ||
1932 | default: | |
1933 | /* | |
1934 | * Ignore this operation if it doesn't request an ACK | |
1935 | * or an earlier RDMA read or atomic is going to be resent. | |
1936 | */ | |
1937 | if (!(psn & IB_BTH_REQ_ACK) || old_req) | |
1938 | goto unlock_done; | |
1939 | /* | |
1940 | * Resend the most recent ACK if this request is | |
1941 | * after all the previous RDMA reads and atomics. | |
1942 | */ | |
1943 | if (i == qp->r_head_ack_queue) { | |
1944 | spin_unlock_irqrestore(&qp->s_lock, flags); | |
1945 | qp->r_nak_state = 0; | |
1946 | qp->r_ack_psn = qp->r_psn - 1; | |
1947 | goto send_ack; | |
1948 | } | |
1949 | ||
1950 | /* | |
1951 | * Resend the RDMA read or atomic op which | |
1952 | * ACKs this duplicate request. | |
1953 | */ | |
1954 | qp->s_tail_ack_queue = i; | |
1955 | break; | |
1956 | } | |
1957 | qp->s_ack_state = OP(ACKNOWLEDGE); | |
54d10c1e | 1958 | qp->s_flags |= RVT_S_RESP_PENDING; |
77241056 MM |
1959 | qp->r_nak_state = 0; |
1960 | hfi1_schedule_send(qp); | |
1961 | ||
1962 | unlock_done: | |
1963 | spin_unlock_irqrestore(&qp->s_lock, flags); | |
1964 | done: | |
1965 | return 1; | |
1966 | ||
1967 | send_ack: | |
1968 | return 0; | |
1969 | } | |
1970 | ||
895420dd | 1971 | static inline void update_ack_queue(struct rvt_qp *qp, unsigned n) |
77241056 MM |
1972 | { |
1973 | unsigned next; | |
1974 | ||
1975 | next = n + 1; | |
ddf922c3 | 1976 | if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device))) |
77241056 MM |
1977 | next = 0; |
1978 | qp->s_tail_ack_queue = next; | |
1979 | qp->s_ack_state = OP(ACKNOWLEDGE); | |
1980 | } | |
1981 | ||
1982 | static void log_cca_event(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, | |
1983 | u32 lqpn, u32 rqpn, u8 svc_type) | |
1984 | { | |
1985 | struct opa_hfi1_cong_log_event_internal *cc_event; | |
b77d713a | 1986 | unsigned long flags; |
77241056 MM |
1987 | |
1988 | if (sl >= OPA_MAX_SLS) | |
1989 | return; | |
1990 | ||
b77d713a | 1991 | spin_lock_irqsave(&ppd->cc_log_lock, flags); |
77241056 | 1992 | |
8638b77f | 1993 | ppd->threshold_cong_event_map[sl / 8] |= 1 << (sl % 8); |
77241056 MM |
1994 | ppd->threshold_event_counter++; |
1995 | ||
1996 | cc_event = &ppd->cc_events[ppd->cc_log_idx++]; | |
1997 | if (ppd->cc_log_idx == OPA_CONG_LOG_ELEMS) | |
1998 | ppd->cc_log_idx = 0; | |
ec4274f1 DD |
1999 | cc_event->lqpn = lqpn & RVT_QPN_MASK; |
2000 | cc_event->rqpn = rqpn & RVT_QPN_MASK; | |
77241056 MM |
2001 | cc_event->sl = sl; |
2002 | cc_event->svc_type = svc_type; | |
2003 | cc_event->rlid = rlid; | |
2004 | /* keep timestamp in units of 1.024 usec */ | |
d61ea075 | 2005 | cc_event->timestamp = ktime_get_ns() / 1024; |
77241056 | 2006 | |
b77d713a | 2007 | spin_unlock_irqrestore(&ppd->cc_log_lock, flags); |
77241056 MM |
2008 | } |
2009 | ||
5b6cabb0 | 2010 | void process_becn(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, u32 lqpn, |
77241056 MM |
2011 | u32 rqpn, u8 svc_type) |
2012 | { | |
2013 | struct cca_timer *cca_timer; | |
2014 | u16 ccti, ccti_incr, ccti_timer, ccti_limit; | |
2015 | u8 trigger_threshold; | |
2016 | struct cc_state *cc_state; | |
b77d713a | 2017 | unsigned long flags; |
77241056 MM |
2018 | |
2019 | if (sl >= OPA_MAX_SLS) | |
2020 | return; | |
2021 | ||
77241056 MM |
2022 | cc_state = get_cc_state(ppd); |
2023 | ||
d125a6c6 | 2024 | if (!cc_state) |
77241056 MM |
2025 | return; |
2026 | ||
2027 | /* | |
2028 | * 1) increase CCTI (for this SL) | |
2029 | * 2) select IPG (i.e., call set_link_ipg()) | |
2030 | * 3) start timer | |
2031 | */ | |
2032 | ccti_limit = cc_state->cct.ccti_limit; | |
2033 | ccti_incr = cc_state->cong_setting.entries[sl].ccti_increase; | |
2034 | ccti_timer = cc_state->cong_setting.entries[sl].ccti_timer; | |
2035 | trigger_threshold = | |
2036 | cc_state->cong_setting.entries[sl].trigger_threshold; | |
2037 | ||
b77d713a | 2038 | spin_lock_irqsave(&ppd->cca_timer_lock, flags); |
77241056 | 2039 | |
d35cf744 | 2040 | cca_timer = &ppd->cca_timer[sl]; |
77241056 MM |
2041 | if (cca_timer->ccti < ccti_limit) { |
2042 | if (cca_timer->ccti + ccti_incr <= ccti_limit) | |
2043 | cca_timer->ccti += ccti_incr; | |
2044 | else | |
2045 | cca_timer->ccti = ccti_limit; | |
2046 | set_link_ipg(ppd); | |
2047 | } | |
2048 | ||
77241056 MM |
2049 | ccti = cca_timer->ccti; |
2050 | ||
2051 | if (!hrtimer_active(&cca_timer->hrtimer)) { | |
2052 | /* ccti_timer is in units of 1.024 usec */ | |
2053 | unsigned long nsec = 1024 * ccti_timer; | |
2054 | ||
2055 | hrtimer_start(&cca_timer->hrtimer, ns_to_ktime(nsec), | |
3ce459cd | 2056 | HRTIMER_MODE_REL_PINNED); |
77241056 MM |
2057 | } |
2058 | ||
d35cf744 JJ |
2059 | spin_unlock_irqrestore(&ppd->cca_timer_lock, flags); |
2060 | ||
77241056 MM |
2061 | if ((trigger_threshold != 0) && (ccti >= trigger_threshold)) |
2062 | log_cca_event(ppd, sl, rlid, lqpn, rqpn, svc_type); | |
2063 | } | |
2064 | ||
2065 | /** | |
2066 | * hfi1_rc_rcv - process an incoming RC packet | |
5b6cabb0 | 2067 | * @packet: data packet information |
77241056 MM |
2068 | * |
2069 | * This is called from qp_rcv() to process an incoming RC packet | |
2070 | * for the given QP. | |
b77d713a | 2071 | * May be called at interrupt level. |
77241056 MM |
2072 | */ |
2073 | void hfi1_rc_rcv(struct hfi1_packet *packet) | |
2074 | { | |
2075 | struct hfi1_ctxtdata *rcd = packet->rcd; | |
72c07e2b | 2076 | void *data = packet->payload; |
77241056 | 2077 | u32 tlen = packet->tlen; |
895420dd | 2078 | struct rvt_qp *qp = packet->qp; |
f3e862cb | 2079 | struct hfi1_ibport *ibp = rcd_to_iport(rcd); |
261a4351 | 2080 | struct ib_other_headers *ohdr = packet->ohdr; |
9039746c | 2081 | u32 opcode = packet->opcode; |
77241056 | 2082 | u32 hdrsize = packet->hlen; |
5b6cabb0 | 2083 | u32 psn = ib_bth_get_psn(packet->ohdr); |
9039746c | 2084 | u32 pad = packet->pad; |
77241056 MM |
2085 | struct ib_wc wc; |
2086 | u32 pmtu = qp->pmtu; | |
2087 | int diff; | |
2088 | struct ib_reth *reth; | |
2089 | unsigned long flags; | |
4608e4c8 | 2090 | int ret; |
fe4dd423 | 2091 | bool copy_last = false, fecn; |
a2df0c83 | 2092 | u32 rkey; |
5b6cabb0 | 2093 | u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2); |
77241056 | 2094 | |
68e78b3d | 2095 | lockdep_assert_held(&qp->r_lock); |
9039746c | 2096 | |
9039746c | 2097 | if (hfi1_ruc_check_hdr(ibp, packet)) |
77241056 MM |
2098 | return; |
2099 | ||
fe4dd423 | 2100 | fecn = process_ecn(qp, packet); |
48a615dc | 2101 | opfn_trigger_conn_request(qp, be32_to_cpu(ohdr->bth[1])); |
77241056 MM |
2102 | |
2103 | /* | |
2104 | * Process responses (ACKs) before anything else. Note that the | |
2105 | * packet sequence number will be for something in the send work | |
2106 | * queue rather than the expected receive packet sequence number. | |
2107 | * In other words, this QP is the requester. | |
2108 | */ | |
2109 | if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && | |
2110 | opcode <= OP(ATOMIC_ACKNOWLEDGE)) { | |
5b6cabb0 | 2111 | rc_rcv_resp(packet); |
77241056 MM |
2112 | return; |
2113 | } | |
2114 | ||
2115 | /* Compute 24 bits worth of difference. */ | |
2116 | diff = delta_psn(psn, qp->r_psn); | |
2117 | if (unlikely(diff)) { | |
2118 | if (rc_rcv_error(ohdr, data, qp, opcode, psn, diff, rcd)) | |
2119 | return; | |
2120 | goto send_ack; | |
2121 | } | |
2122 | ||
2123 | /* Check for opcode sequence errors. */ | |
2124 | switch (qp->r_state) { | |
2125 | case OP(SEND_FIRST): | |
2126 | case OP(SEND_MIDDLE): | |
2127 | if (opcode == OP(SEND_MIDDLE) || | |
2128 | opcode == OP(SEND_LAST) || | |
a2df0c83 JX |
2129 | opcode == OP(SEND_LAST_WITH_IMMEDIATE) || |
2130 | opcode == OP(SEND_LAST_WITH_INVALIDATE)) | |
77241056 MM |
2131 | break; |
2132 | goto nack_inv; | |
2133 | ||
2134 | case OP(RDMA_WRITE_FIRST): | |
2135 | case OP(RDMA_WRITE_MIDDLE): | |
2136 | if (opcode == OP(RDMA_WRITE_MIDDLE) || | |
2137 | opcode == OP(RDMA_WRITE_LAST) || | |
2138 | opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) | |
2139 | break; | |
2140 | goto nack_inv; | |
2141 | ||
2142 | default: | |
2143 | if (opcode == OP(SEND_MIDDLE) || | |
2144 | opcode == OP(SEND_LAST) || | |
2145 | opcode == OP(SEND_LAST_WITH_IMMEDIATE) || | |
a2df0c83 | 2146 | opcode == OP(SEND_LAST_WITH_INVALIDATE) || |
77241056 MM |
2147 | opcode == OP(RDMA_WRITE_MIDDLE) || |
2148 | opcode == OP(RDMA_WRITE_LAST) || | |
2149 | opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) | |
2150 | goto nack_inv; | |
2151 | /* | |
2152 | * Note that it is up to the requester to not send a new | |
2153 | * RDMA read or atomic operation before receiving an ACK | |
2154 | * for the previous operation. | |
2155 | */ | |
2156 | break; | |
2157 | } | |
2158 | ||
54d10c1e | 2159 | if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST)) |
beb5a042 | 2160 | rvt_comm_est(qp); |
77241056 MM |
2161 | |
2162 | /* OK, process the packet. */ | |
2163 | switch (opcode) { | |
2164 | case OP(SEND_FIRST): | |
832369fa | 2165 | ret = rvt_get_rwqe(qp, false); |
77241056 MM |
2166 | if (ret < 0) |
2167 | goto nack_op_err; | |
2168 | if (!ret) | |
2169 | goto rnr_nak; | |
2170 | qp->r_rcv_len = 0; | |
2171 | /* FALLTHROUGH */ | |
2172 | case OP(SEND_MIDDLE): | |
2173 | case OP(RDMA_WRITE_MIDDLE): | |
2174 | send_middle: | |
2175 | /* Check for invalid length PMTU or posted rwqe len. */ | |
5b6cabb0 DH |
2176 | /* |
2177 | * There will be no padding for 9B packet but 16B packets | |
2178 | * will come in with some padding since we always add | |
2179 | * CRC and LT bytes which will need to be flit aligned | |
2180 | */ | |
2181 | if (unlikely(tlen != (hdrsize + pmtu + extra_bytes))) | |
77241056 MM |
2182 | goto nack_inv; |
2183 | qp->r_rcv_len += pmtu; | |
2184 | if (unlikely(qp->r_rcv_len > qp->r_len)) | |
2185 | goto nack_inv; | |
019f118b | 2186 | rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false); |
77241056 MM |
2187 | break; |
2188 | ||
2189 | case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): | |
2190 | /* consume RWQE */ | |
832369fa | 2191 | ret = rvt_get_rwqe(qp, true); |
77241056 MM |
2192 | if (ret < 0) |
2193 | goto nack_op_err; | |
2194 | if (!ret) | |
2195 | goto rnr_nak; | |
2196 | goto send_last_imm; | |
2197 | ||
2198 | case OP(SEND_ONLY): | |
2199 | case OP(SEND_ONLY_WITH_IMMEDIATE): | |
a2df0c83 | 2200 | case OP(SEND_ONLY_WITH_INVALIDATE): |
832369fa | 2201 | ret = rvt_get_rwqe(qp, false); |
77241056 MM |
2202 | if (ret < 0) |
2203 | goto nack_op_err; | |
2204 | if (!ret) | |
2205 | goto rnr_nak; | |
2206 | qp->r_rcv_len = 0; | |
2207 | if (opcode == OP(SEND_ONLY)) | |
2208 | goto no_immediate_data; | |
a2df0c83 JX |
2209 | if (opcode == OP(SEND_ONLY_WITH_INVALIDATE)) |
2210 | goto send_last_inv; | |
6ffeb21f | 2211 | /* FALLTHROUGH -- for SEND_ONLY_WITH_IMMEDIATE */ |
77241056 MM |
2212 | case OP(SEND_LAST_WITH_IMMEDIATE): |
2213 | send_last_imm: | |
2214 | wc.ex.imm_data = ohdr->u.imm_data; | |
2215 | wc.wc_flags = IB_WC_WITH_IMM; | |
2216 | goto send_last; | |
a2df0c83 JX |
2217 | case OP(SEND_LAST_WITH_INVALIDATE): |
2218 | send_last_inv: | |
2219 | rkey = be32_to_cpu(ohdr->u.ieth); | |
2220 | if (rvt_invalidate_rkey(qp, rkey)) | |
2221 | goto no_immediate_data; | |
2222 | wc.ex.invalidate_rkey = rkey; | |
2223 | wc.wc_flags = IB_WC_WITH_INVALIDATE; | |
2224 | goto send_last; | |
77241056 | 2225 | case OP(RDMA_WRITE_LAST): |
0128fcea | 2226 | copy_last = rvt_is_user_qp(qp); |
7b0b01aa DL |
2227 | /* fall through */ |
2228 | case OP(SEND_LAST): | |
77241056 MM |
2229 | no_immediate_data: |
2230 | wc.wc_flags = 0; | |
2231 | wc.ex.imm_data = 0; | |
2232 | send_last: | |
77241056 MM |
2233 | /* Check for invalid length. */ |
2234 | /* LAST len should be >= 1 */ | |
5b6cabb0 | 2235 | if (unlikely(tlen < (hdrsize + extra_bytes))) |
77241056 | 2236 | goto nack_inv; |
5b6cabb0 DH |
2237 | /* Don't count the CRC(and padding and LT byte for 16B). */ |
2238 | tlen -= (hdrsize + extra_bytes); | |
77241056 MM |
2239 | wc.byte_len = tlen + qp->r_rcv_len; |
2240 | if (unlikely(wc.byte_len > qp->r_len)) | |
2241 | goto nack_inv; | |
019f118b | 2242 | rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, copy_last); |
ec4274f1 | 2243 | rvt_put_ss(&qp->r_sge); |
77241056 | 2244 | qp->r_msn++; |
53e91d26 | 2245 | if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) |
77241056 MM |
2246 | break; |
2247 | wc.wr_id = qp->r_wr_id; | |
2248 | wc.status = IB_WC_SUCCESS; | |
2249 | if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) || | |
2250 | opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) | |
2251 | wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; | |
2252 | else | |
2253 | wc.opcode = IB_WC_RECV; | |
2254 | wc.qp = &qp->ibqp; | |
2255 | wc.src_qp = qp->remote_qpn; | |
b64581ad | 2256 | wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX; |
77241056 MM |
2257 | /* |
2258 | * It seems that IB mandates the presence of an SL in a | |
2259 | * work completion only for the UD transport (see section | |
2260 | * 11.4.2 of IBTA Vol. 1). | |
2261 | * | |
2262 | * However, the way the SL is chosen below is consistent | |
2263 | * with the way that IB/qib works and is trying avoid | |
2264 | * introducing incompatibilities. | |
2265 | * | |
2266 | * See also OPA Vol. 1, section 9.7.6, and table 9-17. | |
2267 | */ | |
d8966fcd | 2268 | wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr); |
77241056 MM |
2269 | /* zero fields that are N/A */ |
2270 | wc.vendor_err = 0; | |
2271 | wc.pkey_index = 0; | |
2272 | wc.dlid_path_bits = 0; | |
2273 | wc.port_num = 0; | |
2274 | /* Signal completion event if the solicited bit is set. */ | |
abd712da | 2275 | rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, |
f150e273 | 2276 | ib_bth_is_solicited(ohdr)); |
77241056 MM |
2277 | break; |
2278 | ||
77241056 | 2279 | case OP(RDMA_WRITE_ONLY): |
0128fcea | 2280 | copy_last = rvt_is_user_qp(qp); |
7b0b01aa DL |
2281 | /* fall through */ |
2282 | case OP(RDMA_WRITE_FIRST): | |
77241056 MM |
2283 | case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): |
2284 | if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) | |
2285 | goto nack_inv; | |
2286 | /* consume RWQE */ | |
2287 | reth = &ohdr->u.rc.reth; | |
2288 | qp->r_len = be32_to_cpu(reth->length); | |
2289 | qp->r_rcv_len = 0; | |
2290 | qp->r_sge.sg_list = NULL; | |
2291 | if (qp->r_len != 0) { | |
2292 | u32 rkey = be32_to_cpu(reth->rkey); | |
261a4351 | 2293 | u64 vaddr = get_ib_reth_vaddr(reth); |
77241056 MM |
2294 | int ok; |
2295 | ||
2296 | /* Check rkey & NAK */ | |
895420dd DD |
2297 | ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr, |
2298 | rkey, IB_ACCESS_REMOTE_WRITE); | |
77241056 MM |
2299 | if (unlikely(!ok)) |
2300 | goto nack_acc; | |
2301 | qp->r_sge.num_sge = 1; | |
2302 | } else { | |
2303 | qp->r_sge.num_sge = 0; | |
2304 | qp->r_sge.sge.mr = NULL; | |
2305 | qp->r_sge.sge.vaddr = NULL; | |
2306 | qp->r_sge.sge.length = 0; | |
2307 | qp->r_sge.sge.sge_length = 0; | |
2308 | } | |
2309 | if (opcode == OP(RDMA_WRITE_FIRST)) | |
2310 | goto send_middle; | |
2311 | else if (opcode == OP(RDMA_WRITE_ONLY)) | |
2312 | goto no_immediate_data; | |
832369fa | 2313 | ret = rvt_get_rwqe(qp, true); |
77241056 MM |
2314 | if (ret < 0) |
2315 | goto nack_op_err; | |
1feb4006 MM |
2316 | if (!ret) { |
2317 | /* peer will send again */ | |
2318 | rvt_put_ss(&qp->r_sge); | |
77241056 | 2319 | goto rnr_nak; |
1feb4006 | 2320 | } |
77241056 MM |
2321 | wc.ex.imm_data = ohdr->u.rc.imm_data; |
2322 | wc.wc_flags = IB_WC_WITH_IMM; | |
2323 | goto send_last; | |
2324 | ||
2325 | case OP(RDMA_READ_REQUEST): { | |
895420dd | 2326 | struct rvt_ack_entry *e; |
77241056 MM |
2327 | u32 len; |
2328 | u8 next; | |
2329 | ||
2330 | if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) | |
2331 | goto nack_inv; | |
2332 | next = qp->r_head_ack_queue + 1; | |
ddf922c3 KW |
2333 | /* s_ack_queue is size rvt_size_atomic()+1 so use > not >= */ |
2334 | if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device))) | |
77241056 MM |
2335 | next = 0; |
2336 | spin_lock_irqsave(&qp->s_lock, flags); | |
2337 | if (unlikely(next == qp->s_tail_ack_queue)) { | |
2338 | if (!qp->s_ack_queue[next].sent) | |
2339 | goto nack_inv_unlck; | |
2340 | update_ack_queue(qp, next); | |
2341 | } | |
2342 | e = &qp->s_ack_queue[qp->r_head_ack_queue]; | |
2343 | if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { | |
895420dd | 2344 | rvt_put_mr(e->rdma_sge.mr); |
77241056 MM |
2345 | e->rdma_sge.mr = NULL; |
2346 | } | |
2347 | reth = &ohdr->u.rc.reth; | |
2348 | len = be32_to_cpu(reth->length); | |
2349 | if (len) { | |
2350 | u32 rkey = be32_to_cpu(reth->rkey); | |
261a4351 | 2351 | u64 vaddr = get_ib_reth_vaddr(reth); |
77241056 MM |
2352 | int ok; |
2353 | ||
2354 | /* Check rkey & NAK */ | |
895420dd DD |
2355 | ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, |
2356 | rkey, IB_ACCESS_REMOTE_READ); | |
77241056 MM |
2357 | if (unlikely(!ok)) |
2358 | goto nack_acc_unlck; | |
2359 | /* | |
2360 | * Update the next expected PSN. We add 1 later | |
2361 | * below, so only add the remainder here. | |
2362 | */ | |
5dc80605 | 2363 | qp->r_psn += rvt_div_mtu(qp, len - 1); |
77241056 MM |
2364 | } else { |
2365 | e->rdma_sge.mr = NULL; | |
2366 | e->rdma_sge.vaddr = NULL; | |
2367 | e->rdma_sge.length = 0; | |
2368 | e->rdma_sge.sge_length = 0; | |
2369 | } | |
2370 | e->opcode = opcode; | |
2371 | e->sent = 0; | |
2372 | e->psn = psn; | |
2373 | e->lpsn = qp->r_psn; | |
2374 | /* | |
2375 | * We need to increment the MSN here instead of when we | |
2376 | * finish sending the result since a duplicate request would | |
2377 | * increment it more than once. | |
2378 | */ | |
2379 | qp->r_msn++; | |
2380 | qp->r_psn++; | |
2381 | qp->r_state = opcode; | |
2382 | qp->r_nak_state = 0; | |
2383 | qp->r_head_ack_queue = next; | |
2384 | ||
ca00c62b | 2385 | /* Schedule the send engine. */ |
54d10c1e | 2386 | qp->s_flags |= RVT_S_RESP_PENDING; |
fe4dd423 MH |
2387 | if (fecn) |
2388 | qp->s_flags |= RVT_S_ECN; | |
77241056 MM |
2389 | hfi1_schedule_send(qp); |
2390 | ||
2391 | spin_unlock_irqrestore(&qp->s_lock, flags); | |
77241056 MM |
2392 | return; |
2393 | } | |
2394 | ||
2395 | case OP(COMPARE_SWAP): | |
2396 | case OP(FETCH_ADD): { | |
48a615dc KW |
2397 | struct ib_atomic_eth *ateth = &ohdr->u.atomic_eth; |
2398 | u64 vaddr = get_ib_ateth_vaddr(ateth); | |
2399 | bool opfn = opcode == OP(COMPARE_SWAP) && | |
2400 | vaddr == HFI1_VERBS_E_ATOMIC_VADDR; | |
895420dd | 2401 | struct rvt_ack_entry *e; |
77241056 MM |
2402 | atomic64_t *maddr; |
2403 | u64 sdata; | |
2404 | u32 rkey; | |
2405 | u8 next; | |
2406 | ||
48a615dc KW |
2407 | if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) && |
2408 | !opfn)) | |
77241056 MM |
2409 | goto nack_inv; |
2410 | next = qp->r_head_ack_queue + 1; | |
ddf922c3 | 2411 | if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device))) |
77241056 MM |
2412 | next = 0; |
2413 | spin_lock_irqsave(&qp->s_lock, flags); | |
2414 | if (unlikely(next == qp->s_tail_ack_queue)) { | |
2415 | if (!qp->s_ack_queue[next].sent) | |
2416 | goto nack_inv_unlck; | |
2417 | update_ack_queue(qp, next); | |
2418 | } | |
2419 | e = &qp->s_ack_queue[qp->r_head_ack_queue]; | |
2420 | if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { | |
895420dd | 2421 | rvt_put_mr(e->rdma_sge.mr); |
77241056 MM |
2422 | e->rdma_sge.mr = NULL; |
2423 | } | |
48a615dc KW |
2424 | /* Process OPFN special virtual address */ |
2425 | if (opfn) { | |
2426 | opfn_conn_response(qp, e, ateth); | |
2427 | goto ack; | |
2428 | } | |
77241056 MM |
2429 | if (unlikely(vaddr & (sizeof(u64) - 1))) |
2430 | goto nack_inv_unlck; | |
2431 | rkey = be32_to_cpu(ateth->rkey); | |
2432 | /* Check rkey & NAK */ | |
895420dd DD |
2433 | if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), |
2434 | vaddr, rkey, | |
2435 | IB_ACCESS_REMOTE_ATOMIC))) | |
77241056 MM |
2436 | goto nack_acc_unlck; |
2437 | /* Perform atomic OP and save result. */ | |
50e5dcbe | 2438 | maddr = (atomic64_t *)qp->r_sge.sge.vaddr; |
261a4351 | 2439 | sdata = get_ib_ateth_swap(ateth); |
77241056 | 2440 | e->atomic_data = (opcode == OP(FETCH_ADD)) ? |
50e5dcbe JJ |
2441 | (u64)atomic64_add_return(sdata, maddr) - sdata : |
2442 | (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr, | |
261a4351 | 2443 | get_ib_ateth_compare(ateth), |
77241056 | 2444 | sdata); |
895420dd | 2445 | rvt_put_mr(qp->r_sge.sge.mr); |
77241056 | 2446 | qp->r_sge.num_sge = 0; |
48a615dc | 2447 | ack: |
77241056 MM |
2448 | e->opcode = opcode; |
2449 | e->sent = 0; | |
2450 | e->psn = psn; | |
2451 | e->lpsn = psn; | |
2452 | qp->r_msn++; | |
2453 | qp->r_psn++; | |
2454 | qp->r_state = opcode; | |
2455 | qp->r_nak_state = 0; | |
2456 | qp->r_head_ack_queue = next; | |
2457 | ||
ca00c62b | 2458 | /* Schedule the send engine. */ |
54d10c1e | 2459 | qp->s_flags |= RVT_S_RESP_PENDING; |
fe4dd423 MH |
2460 | if (fecn) |
2461 | qp->s_flags |= RVT_S_ECN; | |
77241056 MM |
2462 | hfi1_schedule_send(qp); |
2463 | ||
2464 | spin_unlock_irqrestore(&qp->s_lock, flags); | |
77241056 MM |
2465 | return; |
2466 | } | |
2467 | ||
2468 | default: | |
2469 | /* NAK unknown opcodes. */ | |
2470 | goto nack_inv; | |
2471 | } | |
2472 | qp->r_psn++; | |
2473 | qp->r_state = opcode; | |
2474 | qp->r_ack_psn = psn; | |
2475 | qp->r_nak_state = 0; | |
2476 | /* Send an ACK if requested or required. */ | |
fe4dd423 MH |
2477 | if (psn & IB_BTH_REQ_ACK || fecn) { |
2478 | if (packet->numpkt == 0 || fecn || | |
2479 | qp->r_adefered >= HFI1_PSN_CREDIT) { | |
7c091e5c MM |
2480 | rc_cancel_ack(qp); |
2481 | goto send_ack; | |
2482 | } | |
688f21c0 | 2483 | qp->r_adefered++; |
7c091e5c MM |
2484 | rc_defered_ack(rcd, qp); |
2485 | } | |
77241056 MM |
2486 | return; |
2487 | ||
2488 | rnr_nak: | |
bf640096 | 2489 | qp->r_nak_state = qp->r_min_rnr_timer | IB_RNR_NAK; |
77241056 MM |
2490 | qp->r_ack_psn = qp->r_psn; |
2491 | /* Queue RNR NAK for later */ | |
2fd36865 | 2492 | rc_defered_ack(rcd, qp); |
77241056 MM |
2493 | return; |
2494 | ||
2495 | nack_op_err: | |
beb5a042 | 2496 | rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); |
77241056 MM |
2497 | qp->r_nak_state = IB_NAK_REMOTE_OPERATIONAL_ERROR; |
2498 | qp->r_ack_psn = qp->r_psn; | |
2499 | /* Queue NAK for later */ | |
2fd36865 | 2500 | rc_defered_ack(rcd, qp); |
77241056 MM |
2501 | return; |
2502 | ||
2503 | nack_inv_unlck: | |
2504 | spin_unlock_irqrestore(&qp->s_lock, flags); | |
2505 | nack_inv: | |
beb5a042 | 2506 | rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); |
77241056 MM |
2507 | qp->r_nak_state = IB_NAK_INVALID_REQUEST; |
2508 | qp->r_ack_psn = qp->r_psn; | |
2509 | /* Queue NAK for later */ | |
2fd36865 | 2510 | rc_defered_ack(rcd, qp); |
77241056 MM |
2511 | return; |
2512 | ||
2513 | nack_acc_unlck: | |
2514 | spin_unlock_irqrestore(&qp->s_lock, flags); | |
2515 | nack_acc: | |
beb5a042 | 2516 | rvt_rc_error(qp, IB_WC_LOC_PROT_ERR); |
77241056 MM |
2517 | qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; |
2518 | qp->r_ack_psn = qp->r_psn; | |
2519 | send_ack: | |
fe4dd423 | 2520 | hfi1_send_rc_ack(packet, fecn); |
77241056 MM |
2521 | } |
2522 | ||
2523 | void hfi1_rc_hdrerr( | |
2524 | struct hfi1_ctxtdata *rcd, | |
9039746c | 2525 | struct hfi1_packet *packet, |
895420dd | 2526 | struct rvt_qp *qp) |
77241056 | 2527 | { |
f3e862cb | 2528 | struct hfi1_ibport *ibp = rcd_to_iport(rcd); |
77241056 | 2529 | int diff; |
49c32037 | 2530 | u32 opcode; |
9039746c | 2531 | u32 psn; |
77241056 | 2532 | |
9039746c | 2533 | if (hfi1_ruc_check_hdr(ibp, packet)) |
77241056 MM |
2534 | return; |
2535 | ||
9039746c DH |
2536 | psn = ib_bth_get_psn(packet->ohdr); |
2537 | opcode = ib_bth_get_opcode(packet->ohdr); | |
77241056 MM |
2538 | |
2539 | /* Only deal with RDMA Writes for now */ | |
2540 | if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) { | |
2541 | diff = delta_psn(psn, qp->r_psn); | |
2542 | if (!qp->r_nak_state && diff >= 0) { | |
4eb06882 | 2543 | ibp->rvp.n_rc_seqnak++; |
77241056 MM |
2544 | qp->r_nak_state = IB_NAK_PSN_ERROR; |
2545 | /* Use the expected PSN. */ | |
2546 | qp->r_ack_psn = qp->r_psn; | |
2547 | /* | |
2548 | * Wait to send the sequence | |
2549 | * NAK until all packets | |
2550 | * in the receive queue have | |
2551 | * been processed. | |
2552 | * Otherwise, we end up | |
2553 | * propagating congestion. | |
2554 | */ | |
2fd36865 | 2555 | rc_defered_ack(rcd, qp); |
77241056 MM |
2556 | } /* Out of sequence NAK */ |
2557 | } /* QP Request NAKs */ | |
2558 | } |