Commit | Line | Data |
---|---|---|
77241056 MM |
1 | /* |
2 | * | |
3 | * This file is provided under a dual BSD/GPLv2 license. When using or | |
4 | * redistributing this file, you may do so under either license. | |
5 | * | |
6 | * GPL LICENSE SUMMARY | |
7 | * | |
8 | * Copyright(c) 2015 Intel Corporation. | |
9 | * | |
10 | * This program is free software; you can redistribute it and/or modify | |
11 | * it under the terms of version 2 of the GNU General Public License as | |
12 | * published by the Free Software Foundation. | |
13 | * | |
14 | * This program is distributed in the hope that it will be useful, but | |
15 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | * General Public License for more details. | |
18 | * | |
19 | * BSD LICENSE | |
20 | * | |
21 | * Copyright(c) 2015 Intel Corporation. | |
22 | * | |
23 | * Redistribution and use in source and binary forms, with or without | |
24 | * modification, are permitted provided that the following conditions | |
25 | * are met: | |
26 | * | |
27 | * - Redistributions of source code must retain the above copyright | |
28 | * notice, this list of conditions and the following disclaimer. | |
29 | * - Redistributions in binary form must reproduce the above copyright | |
30 | * notice, this list of conditions and the following disclaimer in | |
31 | * the documentation and/or other materials provided with the | |
32 | * distribution. | |
33 | * - Neither the name of Intel Corporation nor the names of its | |
34 | * contributors may be used to endorse or promote products derived | |
35 | * from this software without specific prior written permission. | |
36 | * | |
37 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
38 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
39 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
40 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
41 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
42 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
43 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
44 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
45 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
46 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
47 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
48 | * | |
49 | */ | |
50 | ||
51 | #include <rdma/ib_mad.h> | |
52 | #include <rdma/ib_user_verbs.h> | |
53 | #include <linux/io.h> | |
54 | #include <linux/module.h> | |
55 | #include <linux/utsname.h> | |
56 | #include <linux/rculist.h> | |
57 | #include <linux/mm.h> | |
58 | #include <linux/random.h> | |
59 | #include <linux/vmalloc.h> | |
60 | ||
61 | #include "hfi.h" | |
62 | #include "common.h" | |
63 | #include "device.h" | |
64 | #include "trace.h" | |
65 | #include "qp.h" | |
45842abb | 66 | #include "verbs_txreq.h" |
77241056 | 67 | |
895420dd | 68 | static unsigned int hfi1_lkey_table_size = 16; |
77241056 MM |
69 | module_param_named(lkey_table_size, hfi1_lkey_table_size, uint, |
70 | S_IRUGO); | |
71 | MODULE_PARM_DESC(lkey_table_size, | |
72 | "LKEY table size in bits (2^n, 1 <= n <= 23)"); | |
73 | ||
74 | static unsigned int hfi1_max_pds = 0xFFFF; | |
75 | module_param_named(max_pds, hfi1_max_pds, uint, S_IRUGO); | |
76 | MODULE_PARM_DESC(max_pds, | |
77 | "Maximum number of protection domains to support"); | |
78 | ||
79 | static unsigned int hfi1_max_ahs = 0xFFFF; | |
80 | module_param_named(max_ahs, hfi1_max_ahs, uint, S_IRUGO); | |
81 | MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support"); | |
82 | ||
83 | unsigned int hfi1_max_cqes = 0x2FFFF; | |
84 | module_param_named(max_cqes, hfi1_max_cqes, uint, S_IRUGO); | |
85 | MODULE_PARM_DESC(max_cqes, | |
86 | "Maximum number of completion queue entries to support"); | |
87 | ||
88 | unsigned int hfi1_max_cqs = 0x1FFFF; | |
89 | module_param_named(max_cqs, hfi1_max_cqs, uint, S_IRUGO); | |
90 | MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support"); | |
91 | ||
92 | unsigned int hfi1_max_qp_wrs = 0x3FFF; | |
93 | module_param_named(max_qp_wrs, hfi1_max_qp_wrs, uint, S_IRUGO); | |
94 | MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support"); | |
95 | ||
96 | unsigned int hfi1_max_qps = 16384; | |
97 | module_param_named(max_qps, hfi1_max_qps, uint, S_IRUGO); | |
98 | MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support"); | |
99 | ||
100 | unsigned int hfi1_max_sges = 0x60; | |
101 | module_param_named(max_sges, hfi1_max_sges, uint, S_IRUGO); | |
102 | MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support"); | |
103 | ||
104 | unsigned int hfi1_max_mcast_grps = 16384; | |
105 | module_param_named(max_mcast_grps, hfi1_max_mcast_grps, uint, S_IRUGO); | |
106 | MODULE_PARM_DESC(max_mcast_grps, | |
107 | "Maximum number of multicast groups to support"); | |
108 | ||
109 | unsigned int hfi1_max_mcast_qp_attached = 16; | |
110 | module_param_named(max_mcast_qp_attached, hfi1_max_mcast_qp_attached, | |
111 | uint, S_IRUGO); | |
112 | MODULE_PARM_DESC(max_mcast_qp_attached, | |
113 | "Maximum number of attached QPs to support"); | |
114 | ||
115 | unsigned int hfi1_max_srqs = 1024; | |
116 | module_param_named(max_srqs, hfi1_max_srqs, uint, S_IRUGO); | |
117 | MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support"); | |
118 | ||
119 | unsigned int hfi1_max_srq_sges = 128; | |
120 | module_param_named(max_srq_sges, hfi1_max_srq_sges, uint, S_IRUGO); | |
121 | MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support"); | |
122 | ||
123 | unsigned int hfi1_max_srq_wrs = 0x1FFFF; | |
124 | module_param_named(max_srq_wrs, hfi1_max_srq_wrs, uint, S_IRUGO); | |
125 | MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support"); | |
126 | ||
14553ca1 MM |
127 | unsigned short piothreshold; |
128 | module_param(piothreshold, ushort, S_IRUGO); | |
129 | MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio"); | |
130 | ||
77241056 MM |
131 | static void verbs_sdma_complete( |
132 | struct sdma_txreq *cookie, | |
a545f530 | 133 | int status); |
77241056 | 134 | |
14553ca1 MM |
135 | static int pio_wait(struct rvt_qp *qp, |
136 | struct send_context *sc, | |
137 | struct hfi1_pkt_state *ps, | |
138 | u32 flag); | |
139 | ||
64ffd86c JJ |
140 | /* Length of buffer to create verbs txreq cache name */ |
141 | #define TXREQ_NAME_LEN 24 | |
142 | ||
77241056 MM |
143 | /* |
144 | * Translate ib_wr_opcode into ib_wc_opcode. | |
145 | */ | |
146 | const enum ib_wc_opcode ib_hfi1_wc_opcode[] = { | |
147 | [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, | |
148 | [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, | |
149 | [IB_WR_SEND] = IB_WC_SEND, | |
150 | [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, | |
151 | [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, | |
152 | [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, | |
153 | [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD | |
154 | }; | |
155 | ||
156 | /* | |
157 | * Length of header by opcode, 0 --> not supported | |
158 | */ | |
159 | const u8 hdr_len_by_opcode[256] = { | |
160 | /* RC */ | |
161 | [IB_OPCODE_RC_SEND_FIRST] = 12 + 8, | |
162 | [IB_OPCODE_RC_SEND_MIDDLE] = 12 + 8, | |
163 | [IB_OPCODE_RC_SEND_LAST] = 12 + 8, | |
164 | [IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE] = 12 + 8 + 4, | |
165 | [IB_OPCODE_RC_SEND_ONLY] = 12 + 8, | |
166 | [IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE] = 12 + 8 + 4, | |
167 | [IB_OPCODE_RC_RDMA_WRITE_FIRST] = 12 + 8 + 16, | |
168 | [IB_OPCODE_RC_RDMA_WRITE_MIDDLE] = 12 + 8, | |
169 | [IB_OPCODE_RC_RDMA_WRITE_LAST] = 12 + 8, | |
170 | [IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = 12 + 8 + 4, | |
171 | [IB_OPCODE_RC_RDMA_WRITE_ONLY] = 12 + 8 + 16, | |
172 | [IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = 12 + 8 + 20, | |
173 | [IB_OPCODE_RC_RDMA_READ_REQUEST] = 12 + 8 + 16, | |
174 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST] = 12 + 8 + 4, | |
175 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE] = 12 + 8, | |
176 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = 12 + 8 + 4, | |
177 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = 12 + 8 + 4, | |
178 | [IB_OPCODE_RC_ACKNOWLEDGE] = 12 + 8 + 4, | |
179 | [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4, | |
180 | [IB_OPCODE_RC_COMPARE_SWAP] = 12 + 8 + 28, | |
181 | [IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28, | |
182 | /* UC */ | |
183 | [IB_OPCODE_UC_SEND_FIRST] = 12 + 8, | |
184 | [IB_OPCODE_UC_SEND_MIDDLE] = 12 + 8, | |
185 | [IB_OPCODE_UC_SEND_LAST] = 12 + 8, | |
186 | [IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE] = 12 + 8 + 4, | |
187 | [IB_OPCODE_UC_SEND_ONLY] = 12 + 8, | |
188 | [IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE] = 12 + 8 + 4, | |
189 | [IB_OPCODE_UC_RDMA_WRITE_FIRST] = 12 + 8 + 16, | |
190 | [IB_OPCODE_UC_RDMA_WRITE_MIDDLE] = 12 + 8, | |
191 | [IB_OPCODE_UC_RDMA_WRITE_LAST] = 12 + 8, | |
192 | [IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = 12 + 8 + 4, | |
193 | [IB_OPCODE_UC_RDMA_WRITE_ONLY] = 12 + 8 + 16, | |
194 | [IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = 12 + 8 + 20, | |
195 | /* UD */ | |
196 | [IB_OPCODE_UD_SEND_ONLY] = 12 + 8 + 8, | |
197 | [IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE] = 12 + 8 + 12 | |
198 | }; | |
199 | ||
200 | static const opcode_handler opcode_handler_tbl[256] = { | |
201 | /* RC */ | |
202 | [IB_OPCODE_RC_SEND_FIRST] = &hfi1_rc_rcv, | |
203 | [IB_OPCODE_RC_SEND_MIDDLE] = &hfi1_rc_rcv, | |
204 | [IB_OPCODE_RC_SEND_LAST] = &hfi1_rc_rcv, | |
205 | [IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE] = &hfi1_rc_rcv, | |
206 | [IB_OPCODE_RC_SEND_ONLY] = &hfi1_rc_rcv, | |
207 | [IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE] = &hfi1_rc_rcv, | |
208 | [IB_OPCODE_RC_RDMA_WRITE_FIRST] = &hfi1_rc_rcv, | |
209 | [IB_OPCODE_RC_RDMA_WRITE_MIDDLE] = &hfi1_rc_rcv, | |
210 | [IB_OPCODE_RC_RDMA_WRITE_LAST] = &hfi1_rc_rcv, | |
211 | [IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = &hfi1_rc_rcv, | |
212 | [IB_OPCODE_RC_RDMA_WRITE_ONLY] = &hfi1_rc_rcv, | |
213 | [IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = &hfi1_rc_rcv, | |
214 | [IB_OPCODE_RC_RDMA_READ_REQUEST] = &hfi1_rc_rcv, | |
215 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST] = &hfi1_rc_rcv, | |
216 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE] = &hfi1_rc_rcv, | |
217 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = &hfi1_rc_rcv, | |
218 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = &hfi1_rc_rcv, | |
219 | [IB_OPCODE_RC_ACKNOWLEDGE] = &hfi1_rc_rcv, | |
220 | [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = &hfi1_rc_rcv, | |
221 | [IB_OPCODE_RC_COMPARE_SWAP] = &hfi1_rc_rcv, | |
222 | [IB_OPCODE_RC_FETCH_ADD] = &hfi1_rc_rcv, | |
223 | /* UC */ | |
224 | [IB_OPCODE_UC_SEND_FIRST] = &hfi1_uc_rcv, | |
225 | [IB_OPCODE_UC_SEND_MIDDLE] = &hfi1_uc_rcv, | |
226 | [IB_OPCODE_UC_SEND_LAST] = &hfi1_uc_rcv, | |
227 | [IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE] = &hfi1_uc_rcv, | |
228 | [IB_OPCODE_UC_SEND_ONLY] = &hfi1_uc_rcv, | |
229 | [IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE] = &hfi1_uc_rcv, | |
230 | [IB_OPCODE_UC_RDMA_WRITE_FIRST] = &hfi1_uc_rcv, | |
231 | [IB_OPCODE_UC_RDMA_WRITE_MIDDLE] = &hfi1_uc_rcv, | |
232 | [IB_OPCODE_UC_RDMA_WRITE_LAST] = &hfi1_uc_rcv, | |
233 | [IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = &hfi1_uc_rcv, | |
234 | [IB_OPCODE_UC_RDMA_WRITE_ONLY] = &hfi1_uc_rcv, | |
235 | [IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = &hfi1_uc_rcv, | |
236 | /* UD */ | |
237 | [IB_OPCODE_UD_SEND_ONLY] = &hfi1_ud_rcv, | |
238 | [IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE] = &hfi1_ud_rcv, | |
239 | /* CNP */ | |
240 | [IB_OPCODE_CNP] = &hfi1_cnp_rcv | |
241 | }; | |
242 | ||
243 | /* | |
244 | * System image GUID. | |
245 | */ | |
246 | __be64 ib_hfi1_sys_image_guid; | |
247 | ||
248 | /** | |
249 | * hfi1_copy_sge - copy data to SGE memory | |
250 | * @ss: the SGE state | |
251 | * @data: the data to copy | |
252 | * @length: the length of the data | |
7b0b01aa | 253 | * @copy_last: do a separate copy of the last 8 bytes |
77241056 MM |
254 | */ |
255 | void hfi1_copy_sge( | |
895420dd | 256 | struct rvt_sge_state *ss, |
77241056 | 257 | void *data, u32 length, |
7b0b01aa DL |
258 | int release, |
259 | int copy_last) | |
77241056 | 260 | { |
895420dd | 261 | struct rvt_sge *sge = &ss->sge; |
7b0b01aa DL |
262 | int in_last = 0; |
263 | int i; | |
77241056 | 264 | |
7b0b01aa DL |
265 | if (copy_last) { |
266 | if (length > 8) { | |
267 | length -= 8; | |
268 | } else { | |
269 | copy_last = 0; | |
270 | in_last = 1; | |
271 | } | |
272 | } | |
273 | ||
274 | again: | |
77241056 MM |
275 | while (length) { |
276 | u32 len = sge->length; | |
277 | ||
278 | if (len > length) | |
279 | len = length; | |
280 | if (len > sge->sge_length) | |
281 | len = sge->sge_length; | |
282 | WARN_ON_ONCE(len == 0); | |
7b0b01aa DL |
283 | if (in_last) { |
284 | /* enforce byte transer ordering */ | |
285 | for (i = 0; i < len; i++) | |
286 | ((u8 *)sge->vaddr)[i] = ((u8 *)data)[i]; | |
287 | } else { | |
288 | memcpy(sge->vaddr, data, len); | |
289 | } | |
77241056 MM |
290 | sge->vaddr += len; |
291 | sge->length -= len; | |
292 | sge->sge_length -= len; | |
293 | if (sge->sge_length == 0) { | |
294 | if (release) | |
895420dd | 295 | rvt_put_mr(sge->mr); |
77241056 MM |
296 | if (--ss->num_sge) |
297 | *sge = *ss->sg_list++; | |
298 | } else if (sge->length == 0 && sge->mr->lkey) { | |
cd4ceee3 | 299 | if (++sge->n >= RVT_SEGSZ) { |
77241056 MM |
300 | if (++sge->m >= sge->mr->mapsz) |
301 | break; | |
302 | sge->n = 0; | |
303 | } | |
304 | sge->vaddr = | |
305 | sge->mr->map[sge->m]->segs[sge->n].vaddr; | |
306 | sge->length = | |
307 | sge->mr->map[sge->m]->segs[sge->n].length; | |
308 | } | |
309 | data += len; | |
310 | length -= len; | |
311 | } | |
7b0b01aa DL |
312 | |
313 | if (copy_last) { | |
314 | copy_last = 0; | |
315 | in_last = 1; | |
316 | length = 8; | |
317 | goto again; | |
318 | } | |
77241056 MM |
319 | } |
320 | ||
321 | /** | |
322 | * hfi1_skip_sge - skip over SGE memory | |
323 | * @ss: the SGE state | |
324 | * @length: the number of bytes to skip | |
325 | */ | |
895420dd | 326 | void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release) |
77241056 | 327 | { |
895420dd | 328 | struct rvt_sge *sge = &ss->sge; |
77241056 MM |
329 | |
330 | while (length) { | |
331 | u32 len = sge->length; | |
332 | ||
333 | if (len > length) | |
334 | len = length; | |
335 | if (len > sge->sge_length) | |
336 | len = sge->sge_length; | |
337 | WARN_ON_ONCE(len == 0); | |
338 | sge->vaddr += len; | |
339 | sge->length -= len; | |
340 | sge->sge_length -= len; | |
341 | if (sge->sge_length == 0) { | |
342 | if (release) | |
895420dd | 343 | rvt_put_mr(sge->mr); |
77241056 MM |
344 | if (--ss->num_sge) |
345 | *sge = *ss->sg_list++; | |
346 | } else if (sge->length == 0 && sge->mr->lkey) { | |
cd4ceee3 | 347 | if (++sge->n >= RVT_SEGSZ) { |
77241056 MM |
348 | if (++sge->m >= sge->mr->mapsz) |
349 | break; | |
350 | sge->n = 0; | |
351 | } | |
352 | sge->vaddr = | |
353 | sge->mr->map[sge->m]->segs[sge->n].vaddr; | |
354 | sge->length = | |
355 | sge->mr->map[sge->m]->segs[sge->n].length; | |
356 | } | |
357 | length -= len; | |
358 | } | |
359 | } | |
360 | ||
77241056 MM |
361 | /* |
362 | * Make sure the QP is ready and able to accept the given opcode. | |
363 | */ | |
364 | static inline int qp_ok(int opcode, struct hfi1_packet *packet) | |
365 | { | |
366 | struct hfi1_ibport *ibp; | |
367 | ||
83693bd1 | 368 | if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK)) |
77241056 MM |
369 | goto dropit; |
370 | if (((opcode & OPCODE_QP_MASK) == packet->qp->allowed_ops) || | |
371 | (opcode == IB_OPCODE_CNP)) | |
372 | return 1; | |
373 | dropit: | |
374 | ibp = &packet->rcd->ppd->ibport_data; | |
4eb06882 | 375 | ibp->rvp.n_pkt_drops++; |
77241056 MM |
376 | return 0; |
377 | } | |
378 | ||
379 | ||
380 | /** | |
381 | * hfi1_ib_rcv - process an incoming packet | |
382 | * @packet: data packet information | |
383 | * | |
384 | * This is called to process an incoming packet at interrupt level. | |
385 | * | |
386 | * Tlen is the length of the header + data + CRC in bytes. | |
387 | */ | |
388 | void hfi1_ib_rcv(struct hfi1_packet *packet) | |
389 | { | |
390 | struct hfi1_ctxtdata *rcd = packet->rcd; | |
391 | struct hfi1_ib_header *hdr = packet->hdr; | |
392 | u32 tlen = packet->tlen; | |
393 | struct hfi1_pportdata *ppd = rcd->ppd; | |
394 | struct hfi1_ibport *ibp = &ppd->ibport_data; | |
ec4274f1 | 395 | struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi; |
b77d713a | 396 | unsigned long flags; |
77241056 MM |
397 | u32 qp_num; |
398 | int lnh; | |
399 | u8 opcode; | |
400 | u16 lid; | |
401 | ||
402 | /* Check for GRH */ | |
403 | lnh = be16_to_cpu(hdr->lrh[0]) & 3; | |
404 | if (lnh == HFI1_LRH_BTH) | |
405 | packet->ohdr = &hdr->u.oth; | |
406 | else if (lnh == HFI1_LRH_GRH) { | |
407 | u32 vtf; | |
408 | ||
409 | packet->ohdr = &hdr->u.l.oth; | |
410 | if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR) | |
411 | goto drop; | |
412 | vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow); | |
413 | if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) | |
414 | goto drop; | |
415 | packet->rcv_flags |= HFI1_HAS_GRH; | |
416 | } else | |
417 | goto drop; | |
418 | ||
419 | trace_input_ibhdr(rcd->dd, hdr); | |
420 | ||
421 | opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24); | |
422 | inc_opstats(tlen, &rcd->opstats->stats[opcode]); | |
423 | ||
424 | /* Get the destination QP number. */ | |
ec4274f1 | 425 | qp_num = be32_to_cpu(packet->ohdr->bth[1]) & RVT_QPN_MASK; |
77241056 | 426 | lid = be16_to_cpu(hdr->lrh[1]); |
8859b4a6 DD |
427 | if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && |
428 | (lid != be16_to_cpu(IB_LID_PERMISSIVE)))) { | |
0facc5a1 DD |
429 | struct rvt_mcast *mcast; |
430 | struct rvt_mcast_qp *p; | |
77241056 MM |
431 | |
432 | if (lnh != HFI1_LRH_GRH) | |
433 | goto drop; | |
0facc5a1 | 434 | mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid); |
77241056 MM |
435 | if (mcast == NULL) |
436 | goto drop; | |
437 | list_for_each_entry_rcu(p, &mcast->qp_list, list) { | |
438 | packet->qp = p->qp; | |
b77d713a | 439 | spin_lock_irqsave(&packet->qp->r_lock, flags); |
77241056 MM |
440 | if (likely((qp_ok(opcode, packet)))) |
441 | opcode_handler_tbl[opcode](packet); | |
b77d713a | 442 | spin_unlock_irqrestore(&packet->qp->r_lock, flags); |
77241056 MM |
443 | } |
444 | /* | |
0facc5a1 | 445 | * Notify rvt_multicast_detach() if it is waiting for us |
77241056 MM |
446 | * to finish. |
447 | */ | |
448 | if (atomic_dec_return(&mcast->refcount) <= 1) | |
449 | wake_up(&mcast->wait); | |
450 | } else { | |
451 | rcu_read_lock(); | |
ec4274f1 | 452 | packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); |
77241056 MM |
453 | if (!packet->qp) { |
454 | rcu_read_unlock(); | |
455 | goto drop; | |
456 | } | |
b77d713a | 457 | spin_lock_irqsave(&packet->qp->r_lock, flags); |
77241056 MM |
458 | if (likely((qp_ok(opcode, packet)))) |
459 | opcode_handler_tbl[opcode](packet); | |
b77d713a | 460 | spin_unlock_irqrestore(&packet->qp->r_lock, flags); |
77241056 MM |
461 | rcu_read_unlock(); |
462 | } | |
463 | return; | |
464 | ||
465 | drop: | |
4eb06882 | 466 | ibp->rvp.n_pkt_drops++; |
77241056 MM |
467 | } |
468 | ||
469 | /* | |
470 | * This is called from a timer to check for QPs | |
471 | * which need kernel memory in order to send a packet. | |
472 | */ | |
473 | static void mem_timer(unsigned long data) | |
474 | { | |
475 | struct hfi1_ibdev *dev = (struct hfi1_ibdev *)data; | |
476 | struct list_head *list = &dev->memwait; | |
895420dd | 477 | struct rvt_qp *qp = NULL; |
77241056 MM |
478 | struct iowait *wait; |
479 | unsigned long flags; | |
4c6829c5 | 480 | struct hfi1_qp_priv *priv; |
77241056 MM |
481 | |
482 | write_seqlock_irqsave(&dev->iowait_lock, flags); | |
483 | if (!list_empty(list)) { | |
484 | wait = list_first_entry(list, struct iowait, list); | |
4c6829c5 DD |
485 | qp = iowait_to_qp(wait); |
486 | priv = qp->priv; | |
487 | list_del_init(&priv->s_iowait.list); | |
77241056 MM |
488 | /* refcount held until actual wake up */ |
489 | if (!list_empty(list)) | |
490 | mod_timer(&dev->mem_timer, jiffies + 1); | |
491 | } | |
492 | write_sequnlock_irqrestore(&dev->iowait_lock, flags); | |
493 | ||
494 | if (qp) | |
54d10c1e | 495 | hfi1_qp_wakeup(qp, RVT_S_WAIT_KMEM); |
77241056 MM |
496 | } |
497 | ||
895420dd | 498 | void update_sge(struct rvt_sge_state *ss, u32 length) |
77241056 | 499 | { |
895420dd | 500 | struct rvt_sge *sge = &ss->sge; |
77241056 MM |
501 | |
502 | sge->vaddr += length; | |
503 | sge->length -= length; | |
504 | sge->sge_length -= length; | |
505 | if (sge->sge_length == 0) { | |
506 | if (--ss->num_sge) | |
507 | *sge = *ss->sg_list++; | |
508 | } else if (sge->length == 0 && sge->mr->lkey) { | |
cd4ceee3 | 509 | if (++sge->n >= RVT_SEGSZ) { |
77241056 MM |
510 | if (++sge->m >= sge->mr->mapsz) |
511 | return; | |
512 | sge->n = 0; | |
513 | } | |
514 | sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr; | |
515 | sge->length = sge->mr->map[sge->m]->segs[sge->n].length; | |
516 | } | |
517 | } | |
518 | ||
77241056 MM |
519 | /* |
520 | * This is called with progress side lock held. | |
521 | */ | |
522 | /* New API */ | |
523 | static void verbs_sdma_complete( | |
524 | struct sdma_txreq *cookie, | |
a545f530 | 525 | int status) |
77241056 MM |
526 | { |
527 | struct verbs_txreq *tx = | |
528 | container_of(cookie, struct verbs_txreq, txreq); | |
895420dd | 529 | struct rvt_qp *qp = tx->qp; |
77241056 MM |
530 | |
531 | spin_lock(&qp->s_lock); | |
532 | if (tx->wqe) | |
533 | hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS); | |
534 | else if (qp->ibqp.qp_type == IB_QPT_RC) { | |
535 | struct hfi1_ib_header *hdr; | |
536 | ||
537 | hdr = &tx->phdr.hdr; | |
538 | hfi1_rc_send_complete(qp, hdr); | |
539 | } | |
77241056 MM |
540 | spin_unlock(&qp->s_lock); |
541 | ||
542 | hfi1_put_txreq(tx); | |
543 | } | |
544 | ||
711e104d MM |
545 | static int wait_kmem(struct hfi1_ibdev *dev, |
546 | struct rvt_qp *qp, | |
547 | struct hfi1_pkt_state *ps) | |
77241056 | 548 | { |
4c6829c5 | 549 | struct hfi1_qp_priv *priv = qp->priv; |
77241056 MM |
550 | unsigned long flags; |
551 | int ret = 0; | |
552 | ||
553 | spin_lock_irqsave(&qp->s_lock, flags); | |
83693bd1 | 554 | if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { |
77241056 | 555 | write_seqlock(&dev->iowait_lock); |
711e104d MM |
556 | list_add_tail(&ps->s_txreq->txreq.list, |
557 | &priv->s_iowait.tx_head); | |
4c6829c5 | 558 | if (list_empty(&priv->s_iowait.list)) { |
77241056 MM |
559 | if (list_empty(&dev->memwait)) |
560 | mod_timer(&dev->mem_timer, jiffies + 1); | |
54d10c1e | 561 | qp->s_flags |= RVT_S_WAIT_KMEM; |
4c6829c5 | 562 | list_add_tail(&priv->s_iowait.list, &dev->memwait); |
54d10c1e | 563 | trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM); |
77241056 MM |
564 | atomic_inc(&qp->refcount); |
565 | } | |
566 | write_sequnlock(&dev->iowait_lock); | |
54d10c1e | 567 | qp->s_flags &= ~RVT_S_BUSY; |
77241056 MM |
568 | ret = -EBUSY; |
569 | } | |
570 | spin_unlock_irqrestore(&qp->s_lock, flags); | |
571 | ||
572 | return ret; | |
573 | } | |
574 | ||
575 | /* | |
576 | * This routine calls txadds for each sg entry. | |
577 | * | |
578 | * Add failures will revert the sge cursor | |
579 | */ | |
711e104d | 580 | static noinline int build_verbs_ulp_payload( |
77241056 | 581 | struct sdma_engine *sde, |
895420dd | 582 | struct rvt_sge_state *ss, |
77241056 MM |
583 | u32 length, |
584 | struct verbs_txreq *tx) | |
585 | { | |
895420dd DD |
586 | struct rvt_sge *sg_list = ss->sg_list; |
587 | struct rvt_sge sge = ss->sge; | |
77241056 MM |
588 | u8 num_sge = ss->num_sge; |
589 | u32 len; | |
590 | int ret = 0; | |
591 | ||
592 | while (length) { | |
593 | len = ss->sge.length; | |
594 | if (len > length) | |
595 | len = length; | |
596 | if (len > ss->sge.sge_length) | |
597 | len = ss->sge.sge_length; | |
598 | WARN_ON_ONCE(len == 0); | |
599 | ret = sdma_txadd_kvaddr( | |
600 | sde->dd, | |
601 | &tx->txreq, | |
602 | ss->sge.vaddr, | |
603 | len); | |
604 | if (ret) | |
605 | goto bail_txadd; | |
606 | update_sge(ss, len); | |
607 | length -= len; | |
608 | } | |
609 | return ret; | |
610 | bail_txadd: | |
611 | /* unwind cursor */ | |
612 | ss->sge = sge; | |
613 | ss->num_sge = num_sge; | |
614 | ss->sg_list = sg_list; | |
615 | return ret; | |
616 | } | |
617 | ||
618 | /* | |
619 | * Build the number of DMA descriptors needed to send length bytes of data. | |
620 | * | |
621 | * NOTE: DMA mapping is held in the tx until completed in the ring or | |
622 | * the tx desc is freed without having been submitted to the ring | |
623 | * | |
bb5df5f9 | 624 | * This routine ensures all the helper routine calls succeed. |
77241056 MM |
625 | */ |
626 | /* New API */ | |
627 | static int build_verbs_tx_desc( | |
628 | struct sdma_engine *sde, | |
895420dd | 629 | struct rvt_sge_state *ss, |
77241056 MM |
630 | u32 length, |
631 | struct verbs_txreq *tx, | |
632 | struct ahg_ib_header *ahdr, | |
633 | u64 pbc) | |
634 | { | |
635 | int ret = 0; | |
bb5df5f9 | 636 | struct hfi1_pio_header *phdr = &tx->phdr; |
77241056 MM |
637 | u16 hdrbytes = tx->hdr_dwords << 2; |
638 | ||
77241056 MM |
639 | if (!ahdr->ahgcount) { |
640 | ret = sdma_txinit_ahg( | |
641 | &tx->txreq, | |
642 | ahdr->tx_flags, | |
643 | hdrbytes + length, | |
644 | ahdr->ahgidx, | |
645 | 0, | |
646 | NULL, | |
647 | 0, | |
648 | verbs_sdma_complete); | |
649 | if (ret) | |
650 | goto bail_txadd; | |
651 | phdr->pbc = cpu_to_le64(pbc); | |
77241056 MM |
652 | ret = sdma_txadd_kvaddr( |
653 | sde->dd, | |
654 | &tx->txreq, | |
bb5df5f9 DD |
655 | phdr, |
656 | hdrbytes); | |
77241056 MM |
657 | if (ret) |
658 | goto bail_txadd; | |
659 | } else { | |
77241056 MM |
660 | ret = sdma_txinit_ahg( |
661 | &tx->txreq, | |
662 | ahdr->tx_flags, | |
663 | length, | |
664 | ahdr->ahgidx, | |
665 | ahdr->ahgcount, | |
666 | ahdr->ahgdesc, | |
667 | hdrbytes, | |
668 | verbs_sdma_complete); | |
669 | if (ret) | |
670 | goto bail_txadd; | |
671 | } | |
672 | ||
673 | /* add the ulp payload - if any. ss can be NULL for acks */ | |
674 | if (ss) | |
675 | ret = build_verbs_ulp_payload(sde, ss, length, tx); | |
676 | bail_txadd: | |
677 | return ret; | |
678 | } | |
679 | ||
895420dd | 680 | int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, |
d46e5144 | 681 | u64 pbc) |
77241056 | 682 | { |
4c6829c5 DD |
683 | struct hfi1_qp_priv *priv = qp->priv; |
684 | struct ahg_ib_header *ahdr = priv->s_hdr; | |
d46e5144 | 685 | u32 hdrwords = qp->s_hdrwords; |
895420dd | 686 | struct rvt_sge_state *ss = qp->s_cur_sge; |
d46e5144 DD |
687 | u32 len = qp->s_cur_size; |
688 | u32 plen = hdrwords + ((len + 3) >> 2) + 2; /* includes pbc */ | |
689 | struct hfi1_ibdev *dev = ps->dev; | |
690 | struct hfi1_pportdata *ppd = ps->ppd; | |
77241056 | 691 | struct verbs_txreq *tx; |
77241056 | 692 | u64 pbc_flags = 0; |
4c6829c5 DD |
693 | u8 sc5 = priv->s_sc; |
694 | ||
77241056 | 695 | int ret; |
77241056 | 696 | |
bb5df5f9 | 697 | tx = ps->s_txreq; |
711e104d MM |
698 | if (!sdma_txreq_built(&tx->txreq)) { |
699 | if (likely(pbc == 0)) { | |
700 | u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5); | |
701 | /* No vl15 here */ | |
702 | /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ | |
703 | pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; | |
704 | ||
705 | pbc = create_pbc(ppd, | |
706 | pbc_flags, | |
707 | qp->srate_mbps, | |
708 | vl, | |
709 | plen); | |
710 | } | |
711 | tx->wqe = qp->s_wqe; | |
712 | ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahdr, pbc); | |
713 | if (unlikely(ret)) | |
714 | goto bail_build; | |
77241056 | 715 | } |
bb5df5f9 DD |
716 | trace_output_ibhdr(dd_from_ibdev(qp->ibqp.device), |
717 | &ps->s_txreq->phdr.hdr); | |
4c6829c5 | 718 | ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq); |
77241056 MM |
719 | if (unlikely(ret == -ECOMM)) |
720 | goto bail_ecomm; | |
721 | return ret; | |
722 | ||
77241056 MM |
723 | bail_ecomm: |
724 | /* The current one got "sent" */ | |
725 | return 0; | |
726 | bail_build: | |
711e104d MM |
727 | ret = wait_kmem(dev, qp, ps); |
728 | if (!ret) { | |
729 | /* free txreq - bad state */ | |
730 | hfi1_put_txreq(ps->s_txreq); | |
731 | ps->s_txreq = NULL; | |
732 | } | |
733 | return ret; | |
77241056 MM |
734 | } |
735 | ||
736 | /* | |
737 | * If we are now in the error state, return zero to flush the | |
738 | * send work request. | |
739 | */ | |
14553ca1 MM |
740 | static int pio_wait(struct rvt_qp *qp, |
741 | struct send_context *sc, | |
742 | struct hfi1_pkt_state *ps, | |
743 | u32 flag) | |
77241056 | 744 | { |
4c6829c5 | 745 | struct hfi1_qp_priv *priv = qp->priv; |
77241056 MM |
746 | struct hfi1_devdata *dd = sc->dd; |
747 | struct hfi1_ibdev *dev = &dd->verbs_dev; | |
748 | unsigned long flags; | |
749 | int ret = 0; | |
750 | ||
751 | /* | |
752 | * Note that as soon as want_buffer() is called and | |
753 | * possibly before it returns, sc_piobufavail() | |
754 | * could be called. Therefore, put QP on the I/O wait list before | |
755 | * enabling the PIO avail interrupt. | |
756 | */ | |
757 | spin_lock_irqsave(&qp->s_lock, flags); | |
83693bd1 | 758 | if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { |
77241056 | 759 | write_seqlock(&dev->iowait_lock); |
711e104d MM |
760 | list_add_tail(&ps->s_txreq->txreq.list, |
761 | &priv->s_iowait.tx_head); | |
4c6829c5 | 762 | if (list_empty(&priv->s_iowait.list)) { |
77241056 MM |
763 | struct hfi1_ibdev *dev = &dd->verbs_dev; |
764 | int was_empty; | |
765 | ||
14553ca1 MM |
766 | dev->n_piowait += !!(flag & RVT_S_WAIT_PIO); |
767 | dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN); | |
77241056 | 768 | dev->n_piowait++; |
14553ca1 | 769 | qp->s_flags |= flag; |
77241056 | 770 | was_empty = list_empty(&sc->piowait); |
4c6829c5 | 771 | list_add_tail(&priv->s_iowait.list, &sc->piowait); |
54d10c1e | 772 | trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO); |
77241056 MM |
773 | atomic_inc(&qp->refcount); |
774 | /* counting: only call wantpiobuf_intr if first user */ | |
775 | if (was_empty) | |
776 | hfi1_sc_wantpiobuf_intr(sc, 1); | |
777 | } | |
778 | write_sequnlock(&dev->iowait_lock); | |
54d10c1e | 779 | qp->s_flags &= ~RVT_S_BUSY; |
77241056 MM |
780 | ret = -EBUSY; |
781 | } | |
782 | spin_unlock_irqrestore(&qp->s_lock, flags); | |
783 | return ret; | |
784 | } | |
785 | ||
895420dd | 786 | struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5) |
77241056 MM |
787 | { |
788 | struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); | |
789 | struct hfi1_pportdata *ppd = dd->pport + (qp->port_num - 1); | |
790 | u8 vl; | |
791 | ||
792 | vl = sc_to_vlt(dd, sc5); | |
793 | if (vl >= ppd->vls_supported && vl != 15) | |
794 | return NULL; | |
795 | return dd->vld[vl].sc; | |
796 | } | |
797 | ||
14553ca1 MM |
798 | static void verbs_pio_complete(void *arg, int code) |
799 | { | |
800 | struct rvt_qp *qp = (struct rvt_qp *)arg; | |
801 | struct hfi1_qp_priv *priv = qp->priv; | |
802 | ||
803 | if (iowait_pio_dec(&priv->s_iowait)) | |
804 | iowait_drain_wakeup(&priv->s_iowait); | |
805 | } | |
806 | ||
895420dd | 807 | int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, |
d46e5144 | 808 | u64 pbc) |
77241056 | 809 | { |
4c6829c5 | 810 | struct hfi1_qp_priv *priv = qp->priv; |
d46e5144 | 811 | u32 hdrwords = qp->s_hdrwords; |
895420dd | 812 | struct rvt_sge_state *ss = qp->s_cur_sge; |
d46e5144 DD |
813 | u32 len = qp->s_cur_size; |
814 | u32 dwords = (len + 3) >> 2; | |
815 | u32 plen = hdrwords + dwords + 2; /* includes pbc */ | |
816 | struct hfi1_pportdata *ppd = ps->ppd; | |
bb5df5f9 | 817 | u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr; |
77241056 | 818 | u64 pbc_flags = 0; |
4f8cc5c0 | 819 | u8 sc5; |
77241056 MM |
820 | unsigned long flags = 0; |
821 | struct send_context *sc; | |
822 | struct pio_buf *pbuf; | |
823 | int wc_status = IB_WC_SUCCESS; | |
bb5df5f9 | 824 | int ret = 0; |
14553ca1 MM |
825 | pio_release_cb cb = NULL; |
826 | ||
827 | /* only RC/UC use complete */ | |
828 | switch (qp->ibqp.qp_type) { | |
829 | case IB_QPT_RC: | |
830 | case IB_QPT_UC: | |
831 | cb = verbs_pio_complete; | |
832 | break; | |
833 | default: | |
834 | break; | |
835 | } | |
77241056 MM |
836 | |
837 | /* vl15 special case taken care of in ud.c */ | |
4c6829c5 | 838 | sc5 = priv->s_sc; |
77241056 MM |
839 | sc = qp_to_send_context(qp, sc5); |
840 | ||
bb5df5f9 DD |
841 | if (!sc) { |
842 | ret = -EINVAL; | |
843 | goto bail; | |
844 | } | |
77241056 | 845 | if (likely(pbc == 0)) { |
4f8cc5c0 | 846 | u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5); |
77241056 MM |
847 | /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ |
848 | pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; | |
849 | pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); | |
850 | } | |
14553ca1 MM |
851 | if (cb) |
852 | iowait_pio_inc(&priv->s_iowait); | |
853 | pbuf = sc_buffer_alloc(sc, plen, cb, qp); | |
77241056 | 854 | if (unlikely(pbuf == NULL)) { |
14553ca1 MM |
855 | if (cb) |
856 | verbs_pio_complete(qp, 0); | |
77241056 MM |
857 | if (ppd->host_link_state != HLS_UP_ACTIVE) { |
858 | /* | |
859 | * If we have filled the PIO buffers to capacity and are | |
860 | * not in an active state this request is not going to | |
861 | * go out to so just complete it with an error or else a | |
862 | * ULP or the core may be stuck waiting. | |
863 | */ | |
864 | hfi1_cdbg( | |
865 | PIO, | |
866 | "alloc failed. state not active, completing"); | |
867 | wc_status = IB_WC_GENERAL_ERR; | |
868 | goto pio_bail; | |
869 | } else { | |
870 | /* | |
871 | * This is a normal occurrence. The PIO buffs are full | |
872 | * up but we are still happily sending, well we could be | |
873 | * so lets continue to queue the request. | |
874 | */ | |
875 | hfi1_cdbg(PIO, "alloc failed. state active, queuing"); | |
14553ca1 | 876 | ret = pio_wait(qp, sc, ps, RVT_S_WAIT_PIO); |
711e104d | 877 | if (!ret) |
14553ca1 | 878 | /* txreq not queued - free */ |
711e104d MM |
879 | goto bail; |
880 | /* tx consumed in wait */ | |
881 | return ret; | |
77241056 MM |
882 | } |
883 | } | |
884 | ||
885 | if (len == 0) { | |
886 | pio_copy(ppd->dd, pbuf, pbc, hdr, hdrwords); | |
887 | } else { | |
888 | if (ss) { | |
889 | seg_pio_copy_start(pbuf, pbc, hdr, hdrwords*4); | |
890 | while (len) { | |
891 | void *addr = ss->sge.vaddr; | |
892 | u32 slen = ss->sge.length; | |
893 | ||
894 | if (slen > len) | |
895 | slen = len; | |
896 | update_sge(ss, slen); | |
897 | seg_pio_copy_mid(pbuf, addr, slen); | |
898 | len -= slen; | |
899 | } | |
900 | seg_pio_copy_end(pbuf); | |
901 | } | |
902 | } | |
903 | ||
bb5df5f9 DD |
904 | trace_output_ibhdr(dd_from_ibdev(qp->ibqp.device), |
905 | &ps->s_txreq->phdr.hdr); | |
77241056 | 906 | |
77241056 MM |
907 | pio_bail: |
908 | if (qp->s_wqe) { | |
909 | spin_lock_irqsave(&qp->s_lock, flags); | |
910 | hfi1_send_complete(qp, qp->s_wqe, wc_status); | |
911 | spin_unlock_irqrestore(&qp->s_lock, flags); | |
912 | } else if (qp->ibqp.qp_type == IB_QPT_RC) { | |
913 | spin_lock_irqsave(&qp->s_lock, flags); | |
bb5df5f9 | 914 | hfi1_rc_send_complete(qp, &ps->s_txreq->phdr.hdr); |
77241056 MM |
915 | spin_unlock_irqrestore(&qp->s_lock, flags); |
916 | } | |
bb5df5f9 DD |
917 | |
918 | ret = 0; | |
919 | ||
920 | bail: | |
921 | hfi1_put_txreq(ps->s_txreq); | |
922 | return ret; | |
77241056 | 923 | } |
b91cc573 | 924 | |
77241056 MM |
925 | /* |
926 | * egress_pkey_matches_entry - return 1 if the pkey matches ent (ent | |
927 | * being an entry from the ingress partition key table), return 0 | |
928 | * otherwise. Use the matching criteria for egress partition keys | |
929 | * specified in the OPAv1 spec., section 9.1l.7. | |
930 | */ | |
931 | static inline int egress_pkey_matches_entry(u16 pkey, u16 ent) | |
932 | { | |
933 | u16 mkey = pkey & PKEY_LOW_15_MASK; | |
934 | u16 ment = ent & PKEY_LOW_15_MASK; | |
935 | ||
936 | if (mkey == ment) { | |
937 | /* | |
938 | * If pkey[15] is set (full partition member), | |
939 | * is bit 15 in the corresponding table element | |
940 | * clear (limited member)? | |
941 | */ | |
942 | if (pkey & PKEY_MEMBER_MASK) | |
943 | return !!(ent & PKEY_MEMBER_MASK); | |
944 | return 1; | |
945 | } | |
946 | return 0; | |
947 | } | |
948 | ||
949 | /* | |
950 | * egress_pkey_check - return 0 if hdr's pkey matches according to the | |
951 | * criteria in the OPAv1 spec., section 9.11.7. | |
952 | */ | |
953 | static inline int egress_pkey_check(struct hfi1_pportdata *ppd, | |
954 | struct hfi1_ib_header *hdr, | |
895420dd | 955 | struct rvt_qp *qp) |
77241056 | 956 | { |
4c6829c5 | 957 | struct hfi1_qp_priv *priv = qp->priv; |
77241056 MM |
958 | struct hfi1_other_headers *ohdr; |
959 | struct hfi1_devdata *dd; | |
960 | int i = 0; | |
961 | u16 pkey; | |
4c6829c5 | 962 | u8 lnh, sc5 = priv->s_sc; |
77241056 MM |
963 | |
964 | if (!(ppd->part_enforce & HFI1_PART_ENFORCE_OUT)) | |
965 | return 0; | |
966 | ||
967 | /* locate the pkey within the headers */ | |
968 | lnh = be16_to_cpu(hdr->lrh[0]) & 3; | |
969 | if (lnh == HFI1_LRH_GRH) | |
970 | ohdr = &hdr->u.l.oth; | |
971 | else | |
972 | ohdr = &hdr->u.oth; | |
973 | ||
974 | pkey = (u16)be32_to_cpu(ohdr->bth[0]); | |
975 | ||
976 | /* If SC15, pkey[0:14] must be 0x7fff */ | |
977 | if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK)) | |
978 | goto bad; | |
979 | ||
980 | ||
981 | /* Is the pkey = 0x0, or 0x8000? */ | |
982 | if ((pkey & PKEY_LOW_15_MASK) == 0) | |
983 | goto bad; | |
984 | ||
985 | /* The most likely matching pkey has index qp->s_pkey_index */ | |
986 | if (unlikely(!egress_pkey_matches_entry(pkey, | |
987 | ppd->pkeys[qp->s_pkey_index]))) { | |
988 | /* no match - try the entire table */ | |
989 | for (; i < MAX_PKEY_VALUES; i++) { | |
990 | if (egress_pkey_matches_entry(pkey, ppd->pkeys[i])) | |
991 | break; | |
992 | } | |
993 | } | |
994 | ||
995 | if (i < MAX_PKEY_VALUES) | |
996 | return 0; | |
997 | bad: | |
998 | incr_cntr64(&ppd->port_xmit_constraint_errors); | |
999 | dd = ppd->dd; | |
1000 | if (!(dd->err_info_xmit_constraint.status & OPA_EI_STATUS_SMASK)) { | |
1001 | u16 slid = be16_to_cpu(hdr->lrh[3]); | |
1002 | ||
1003 | dd->err_info_xmit_constraint.status |= OPA_EI_STATUS_SMASK; | |
1004 | dd->err_info_xmit_constraint.slid = slid; | |
1005 | dd->err_info_xmit_constraint.pkey = pkey; | |
1006 | } | |
1007 | return 1; | |
1008 | } | |
1009 | ||
14553ca1 MM |
1010 | /** |
1011 | * get_send_routine - choose an egress routine | |
1012 | * | |
1013 | * Choose an egress routine based on QP type | |
1014 | * and size | |
1015 | */ | |
1016 | static inline send_routine get_send_routine(struct rvt_qp *qp, | |
1017 | struct hfi1_ib_header *h) | |
1018 | { | |
1019 | struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); | |
1020 | struct hfi1_qp_priv *priv = qp->priv; | |
1021 | ||
1022 | if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA))) | |
1023 | return dd->process_pio_send; | |
1024 | switch (qp->ibqp.qp_type) { | |
1025 | case IB_QPT_SMI: | |
1026 | return dd->process_pio_send; | |
1027 | case IB_QPT_GSI: | |
1028 | case IB_QPT_UD: | |
1029 | if (piothreshold && qp->s_cur_size <= piothreshold) | |
1030 | return dd->process_pio_send; | |
1031 | break; | |
1032 | case IB_QPT_RC: | |
1033 | if (piothreshold && | |
1034 | qp->s_cur_size <= min(piothreshold, qp->pmtu) && | |
1035 | (BIT(get_opcode(h) & 0x1f) & rc_only_opcode) && | |
1036 | iowait_sdma_pending(&priv->s_iowait) == 0) | |
1037 | return dd->process_pio_send; | |
1038 | break; | |
1039 | case IB_QPT_UC: | |
1040 | if (piothreshold && | |
1041 | qp->s_cur_size <= min(piothreshold, qp->pmtu) && | |
1042 | (BIT(get_opcode(h) & 0x1f) & uc_only_opcode) && | |
1043 | iowait_sdma_pending(&priv->s_iowait) == 0) | |
1044 | return dd->process_pio_send; | |
1045 | break; | |
1046 | default: | |
1047 | break; | |
1048 | } | |
1049 | return dd->process_dma_send; | |
1050 | } | |
1051 | ||
77241056 MM |
1052 | /** |
1053 | * hfi1_verbs_send - send a packet | |
1054 | * @qp: the QP to send on | |
d46e5144 | 1055 | * @ps: the state of the packet to send |
77241056 MM |
1056 | * |
1057 | * Return zero if packet is sent or queued OK. | |
54d10c1e | 1058 | * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise. |
77241056 | 1059 | */ |
895420dd | 1060 | int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) |
77241056 MM |
1061 | { |
1062 | struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); | |
14553ca1 | 1063 | send_routine sr; |
77241056 | 1064 | int ret; |
77241056 | 1065 | |
14553ca1 | 1066 | sr = get_send_routine(qp, &ps->s_txreq->phdr.hdr); |
bb5df5f9 | 1067 | ret = egress_pkey_check(dd->pport, &ps->s_txreq->phdr.hdr, qp); |
77241056 MM |
1068 | if (unlikely(ret)) { |
1069 | /* | |
1070 | * The value we are returning here does not get propagated to | |
1071 | * the verbs caller. Thus we need to complete the request with | |
1072 | * error otherwise the caller could be sitting waiting on the | |
1073 | * completion event. Only do this for PIO. SDMA has its own | |
1074 | * mechanism for handling the errors. So for SDMA we can just | |
1075 | * return. | |
1076 | */ | |
14553ca1 MM |
1077 | if (sr == dd->process_pio_send) { |
1078 | unsigned long flags; | |
1079 | ||
77241056 MM |
1080 | hfi1_cdbg(PIO, "%s() Failed. Completing with err", |
1081 | __func__); | |
1082 | spin_lock_irqsave(&qp->s_lock, flags); | |
1083 | hfi1_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR); | |
1084 | spin_unlock_irqrestore(&qp->s_lock, flags); | |
1085 | } | |
1086 | return -EINVAL; | |
1087 | } | |
14553ca1 | 1088 | return sr(qp, ps, 0); |
77241056 MM |
1089 | } |
1090 | ||
94d5171c HC |
1091 | /** |
1092 | * hfi1_fill_device_attr - Fill in rvt dev info device attributes. | |
1093 | * @dd: the device data structure | |
1094 | */ | |
1095 | static void hfi1_fill_device_attr(struct hfi1_devdata *dd) | |
77241056 | 1096 | { |
94d5171c HC |
1097 | struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; |
1098 | ||
1099 | memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props)); | |
1100 | ||
1101 | rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | | |
1102 | IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | | |
1103 | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | | |
1104 | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE; | |
1105 | rdi->dparms.props.page_size_cap = PAGE_SIZE; | |
1106 | rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3; | |
1107 | rdi->dparms.props.vendor_part_id = dd->pcidev->device; | |
1108 | rdi->dparms.props.hw_ver = dd->minrev; | |
1109 | rdi->dparms.props.sys_image_guid = ib_hfi1_sys_image_guid; | |
1110 | rdi->dparms.props.max_mr_size = ~0ULL; | |
1111 | rdi->dparms.props.max_qp = hfi1_max_qps; | |
1112 | rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs; | |
1113 | rdi->dparms.props.max_sge = hfi1_max_sges; | |
1114 | rdi->dparms.props.max_sge_rd = hfi1_max_sges; | |
1115 | rdi->dparms.props.max_cq = hfi1_max_cqs; | |
1116 | rdi->dparms.props.max_ah = hfi1_max_ahs; | |
1117 | rdi->dparms.props.max_cqe = hfi1_max_cqes; | |
1118 | rdi->dparms.props.max_mr = rdi->lkey_table.max; | |
1119 | rdi->dparms.props.max_fmr = rdi->lkey_table.max; | |
1120 | rdi->dparms.props.max_map_per_fmr = 32767; | |
1121 | rdi->dparms.props.max_pd = hfi1_max_pds; | |
1122 | rdi->dparms.props.max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC; | |
1123 | rdi->dparms.props.max_qp_init_rd_atom = 255; | |
1124 | rdi->dparms.props.max_srq = hfi1_max_srqs; | |
1125 | rdi->dparms.props.max_srq_wr = hfi1_max_srq_wrs; | |
1126 | rdi->dparms.props.max_srq_sge = hfi1_max_srq_sges; | |
1127 | rdi->dparms.props.atomic_cap = IB_ATOMIC_GLOB; | |
1128 | rdi->dparms.props.max_pkeys = hfi1_get_npkeys(dd); | |
1129 | rdi->dparms.props.max_mcast_grp = hfi1_max_mcast_grps; | |
1130 | rdi->dparms.props.max_mcast_qp_attach = hfi1_max_mcast_qp_attached; | |
1131 | rdi->dparms.props.max_total_mcast_qp_attach = | |
1132 | rdi->dparms.props.max_mcast_qp_attach * | |
1133 | rdi->dparms.props.max_mcast_grp; | |
77241056 MM |
1134 | } |
1135 | ||
1136 | static inline u16 opa_speed_to_ib(u16 in) | |
1137 | { | |
1138 | u16 out = 0; | |
1139 | ||
1140 | if (in & OPA_LINK_SPEED_25G) | |
1141 | out |= IB_SPEED_EDR; | |
1142 | if (in & OPA_LINK_SPEED_12_5G) | |
1143 | out |= IB_SPEED_FDR; | |
1144 | ||
1145 | return out; | |
1146 | } | |
1147 | ||
1148 | /* | |
1149 | * Convert a single OPA link width (no multiple flags) to an IB value. | |
1150 | * A zero OPA link width means link down, which means the IB width value | |
1151 | * is a don't care. | |
1152 | */ | |
1153 | static inline u16 opa_width_to_ib(u16 in) | |
1154 | { | |
1155 | switch (in) { | |
1156 | case OPA_LINK_WIDTH_1X: | |
1157 | /* map 2x and 3x to 1x as they don't exist in IB */ | |
1158 | case OPA_LINK_WIDTH_2X: | |
1159 | case OPA_LINK_WIDTH_3X: | |
1160 | return IB_WIDTH_1X; | |
1161 | default: /* link down or unknown, return our largest width */ | |
1162 | case OPA_LINK_WIDTH_4X: | |
1163 | return IB_WIDTH_4X; | |
1164 | } | |
1165 | } | |
1166 | ||
45b59eef | 1167 | static int query_port(struct rvt_dev_info *rdi, u8 port_num, |
77241056 MM |
1168 | struct ib_port_attr *props) |
1169 | { | |
45b59eef HC |
1170 | struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi); |
1171 | struct hfi1_devdata *dd = dd_from_dev(verbs_dev); | |
1172 | struct hfi1_pportdata *ppd = &dd->pport[port_num - 1]; | |
77241056 MM |
1173 | u16 lid = ppd->lid; |
1174 | ||
77241056 MM |
1175 | props->lid = lid ? lid : 0; |
1176 | props->lmc = ppd->lmc; | |
77241056 MM |
1177 | /* OPA logical states match IB logical states */ |
1178 | props->state = driver_lstate(ppd); | |
1179 | props->phys_state = hfi1_ibphys_portstate(ppd); | |
77241056 | 1180 | props->gid_tbl_len = HFI1_GUIDS_PER_PORT; |
77241056 MM |
1181 | props->active_width = (u8)opa_width_to_ib(ppd->link_width_active); |
1182 | /* see rate_show() in ib core/sysfs.c */ | |
1183 | props->active_speed = (u8)opa_speed_to_ib(ppd->link_speed_active); | |
1184 | props->max_vl_num = ppd->vls_supported; | |
77241056 MM |
1185 | |
1186 | /* Once we are a "first class" citizen and have added the OPA MTUs to | |
1187 | * the core we can advertise the larger MTU enum to the ULPs, for now | |
1188 | * advertise only 4K. | |
1189 | * | |
1190 | * Those applications which are either OPA aware or pass the MTU enum | |
1191 | * from the Path Records to us will get the new 8k MTU. Those that | |
1192 | * attempt to process the MTU enum may fail in various ways. | |
1193 | */ | |
1194 | props->max_mtu = mtu_to_enum((!valid_ib_mtu(hfi1_max_mtu) ? | |
1195 | 4096 : hfi1_max_mtu), IB_MTU_4096); | |
1196 | props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu : | |
1197 | mtu_to_enum(ppd->ibmtu, IB_MTU_2048); | |
77241056 MM |
1198 | |
1199 | return 0; | |
1200 | } | |
1201 | ||
1202 | static int modify_device(struct ib_device *device, | |
1203 | int device_modify_mask, | |
1204 | struct ib_device_modify *device_modify) | |
1205 | { | |
1206 | struct hfi1_devdata *dd = dd_from_ibdev(device); | |
1207 | unsigned i; | |
1208 | int ret; | |
1209 | ||
1210 | if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID | | |
1211 | IB_DEVICE_MODIFY_NODE_DESC)) { | |
1212 | ret = -EOPNOTSUPP; | |
1213 | goto bail; | |
1214 | } | |
1215 | ||
1216 | if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) { | |
1217 | memcpy(device->node_desc, device_modify->node_desc, 64); | |
1218 | for (i = 0; i < dd->num_pports; i++) { | |
1219 | struct hfi1_ibport *ibp = &dd->pport[i].ibport_data; | |
1220 | ||
1221 | hfi1_node_desc_chg(ibp); | |
1222 | } | |
1223 | } | |
1224 | ||
1225 | if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) { | |
1226 | ib_hfi1_sys_image_guid = | |
1227 | cpu_to_be64(device_modify->sys_image_guid); | |
1228 | for (i = 0; i < dd->num_pports; i++) { | |
1229 | struct hfi1_ibport *ibp = &dd->pport[i].ibport_data; | |
1230 | ||
1231 | hfi1_sys_guid_chg(ibp); | |
1232 | } | |
1233 | } | |
1234 | ||
1235 | ret = 0; | |
1236 | ||
1237 | bail: | |
1238 | return ret; | |
1239 | } | |
1240 | ||
45b59eef | 1241 | static int shut_down_port(struct rvt_dev_info *rdi, u8 port_num) |
77241056 | 1242 | { |
45b59eef HC |
1243 | struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi); |
1244 | struct hfi1_devdata *dd = dd_from_dev(verbs_dev); | |
1245 | struct hfi1_pportdata *ppd = &dd->pport[port_num - 1]; | |
1246 | int ret; | |
77241056 | 1247 | |
45b59eef HC |
1248 | set_link_down_reason(ppd, OPA_LINKDOWN_REASON_UNKNOWN, 0, |
1249 | OPA_LINKDOWN_REASON_UNKNOWN); | |
1250 | ret = set_link_state(ppd, HLS_DN_DOWNDEF); | |
77241056 MM |
1251 | return ret; |
1252 | } | |
1253 | ||
25131463 DD |
1254 | static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, |
1255 | int guid_index, __be64 *guid) | |
77241056 | 1256 | { |
25131463 DD |
1257 | struct hfi1_ibport *ibp = container_of(rvp, struct hfi1_ibport, rvp); |
1258 | struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); | |
77241056 | 1259 | |
25131463 DD |
1260 | if (guid_index == 0) |
1261 | *guid = cpu_to_be64(ppd->guid); | |
1262 | else if (guid_index < HFI1_GUIDS_PER_PORT) | |
1263 | *guid = ibp->guids[guid_index - 1]; | |
1264 | else | |
1265 | return -EINVAL; | |
77241056 | 1266 | |
25131463 | 1267 | return 0; |
77241056 MM |
1268 | } |
1269 | ||
77241056 MM |
1270 | /* |
1271 | * convert ah port,sl to sc | |
1272 | */ | |
1273 | u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah) | |
1274 | { | |
1275 | struct hfi1_ibport *ibp = to_iport(ibdev, ah->port_num); | |
1276 | ||
1277 | return ibp->sl_to_sc[ah->sl]; | |
1278 | } | |
1279 | ||
15723f06 | 1280 | static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr) |
77241056 MM |
1281 | { |
1282 | struct hfi1_ibport *ibp; | |
1283 | struct hfi1_pportdata *ppd; | |
1284 | struct hfi1_devdata *dd; | |
1285 | u8 sc5; | |
1286 | ||
77241056 MM |
1287 | /* test the mapping for validity */ |
1288 | ibp = to_iport(ibdev, ah_attr->port_num); | |
1289 | ppd = ppd_from_ibp(ibp); | |
1290 | sc5 = ibp->sl_to_sc[ah_attr->sl]; | |
1291 | dd = dd_from_ppd(ppd); | |
1292 | if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf) | |
15723f06 | 1293 | return -EINVAL; |
77241056 | 1294 | return 0; |
77241056 MM |
1295 | } |
1296 | ||
8f1764fa DD |
1297 | static void hfi1_notify_new_ah(struct ib_device *ibdev, |
1298 | struct ib_ah_attr *ah_attr, | |
1299 | struct rvt_ah *ah) | |
1300 | { | |
1301 | struct hfi1_ibport *ibp; | |
1302 | struct hfi1_pportdata *ppd; | |
1303 | struct hfi1_devdata *dd; | |
1304 | u8 sc5; | |
1305 | ||
1306 | /* | |
1307 | * Do not trust reading anything from rvt_ah at this point as it is not | |
1308 | * done being setup. We can however modify things which we need to set. | |
1309 | */ | |
1310 | ||
1311 | ibp = to_iport(ibdev, ah_attr->port_num); | |
1312 | ppd = ppd_from_ibp(ibp); | |
1313 | sc5 = ibp->sl_to_sc[ah->attr.sl]; | |
1314 | dd = dd_from_ppd(ppd); | |
1315 | ah->vl = sc_to_vlt(dd, sc5); | |
1316 | if (ah->vl < num_vls || ah->vl == 15) | |
1317 | ah->log_pmtu = ilog2(dd->vld[ah->vl].mtu); | |
1318 | } | |
1319 | ||
77241056 MM |
1320 | struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid) |
1321 | { | |
1322 | struct ib_ah_attr attr; | |
1323 | struct ib_ah *ah = ERR_PTR(-EINVAL); | |
895420dd | 1324 | struct rvt_qp *qp0; |
77241056 MM |
1325 | |
1326 | memset(&attr, 0, sizeof(attr)); | |
1327 | attr.dlid = dlid; | |
1328 | attr.port_num = ppd_from_ibp(ibp)->port; | |
1329 | rcu_read_lock(); | |
4eb06882 | 1330 | qp0 = rcu_dereference(ibp->rvp.qp[0]); |
77241056 MM |
1331 | if (qp0) |
1332 | ah = ib_create_ah(qp0->ibqp.pd, &attr); | |
1333 | rcu_read_unlock(); | |
1334 | return ah; | |
1335 | } | |
1336 | ||
77241056 MM |
1337 | /** |
1338 | * hfi1_get_npkeys - return the size of the PKEY table for context 0 | |
1339 | * @dd: the hfi1_ib device | |
1340 | */ | |
1341 | unsigned hfi1_get_npkeys(struct hfi1_devdata *dd) | |
1342 | { | |
1343 | return ARRAY_SIZE(dd->pport[0].pkeys); | |
1344 | } | |
1345 | ||
77241056 MM |
1346 | static void init_ibport(struct hfi1_pportdata *ppd) |
1347 | { | |
1348 | struct hfi1_ibport *ibp = &ppd->ibport_data; | |
1349 | size_t sz = ARRAY_SIZE(ibp->sl_to_sc); | |
1350 | int i; | |
1351 | ||
1352 | for (i = 0; i < sz; i++) { | |
1353 | ibp->sl_to_sc[i] = i; | |
1354 | ibp->sc_to_sl[i] = i; | |
1355 | } | |
1356 | ||
4eb06882 | 1357 | spin_lock_init(&ibp->rvp.lock); |
77241056 | 1358 | /* Set the prefix to the default value (see ch. 4.1.1) */ |
4eb06882 DD |
1359 | ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX; |
1360 | ibp->rvp.sm_lid = 0; | |
77241056 | 1361 | /* Below should only set bits defined in OPA PortInfo.CapabilityMask */ |
4eb06882 | 1362 | ibp->rvp.port_cap_flags = IB_PORT_AUTO_MIGR_SUP | |
77241056 | 1363 | IB_PORT_CAP_MASK_NOTICE_SUP; |
4eb06882 DD |
1364 | ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA; |
1365 | ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA; | |
1366 | ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS; | |
1367 | ibp->rvp.pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS; | |
1368 | ibp->rvp.pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT; | |
1369 | ||
1370 | RCU_INIT_POINTER(ibp->rvp.qp[0], NULL); | |
1371 | RCU_INIT_POINTER(ibp->rvp.qp[1], NULL); | |
77241056 MM |
1372 | } |
1373 | ||
77241056 MM |
1374 | /** |
1375 | * hfi1_register_ib_device - register our device with the infiniband core | |
1376 | * @dd: the device data structure | |
1377 | * Return 0 if successful, errno if unsuccessful. | |
1378 | */ | |
1379 | int hfi1_register_ib_device(struct hfi1_devdata *dd) | |
1380 | { | |
1381 | struct hfi1_ibdev *dev = &dd->verbs_dev; | |
ec3f2c12 | 1382 | struct ib_device *ibdev = &dev->rdi.ibdev; |
77241056 | 1383 | struct hfi1_pportdata *ppd = dd->pport; |
895420dd | 1384 | unsigned i; |
77241056 MM |
1385 | int ret; |
1386 | size_t lcpysz = IB_DEVICE_NAME_MAX; | |
77241056 | 1387 | |
77241056 MM |
1388 | for (i = 0; i < dd->num_pports; i++) |
1389 | init_ibport(ppd + i); | |
1390 | ||
1391 | /* Only need to initialize non-zero fields. */ | |
4f87ccfc | 1392 | |
045277cf | 1393 | setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev); |
77241056 | 1394 | |
77241056 | 1395 | seqlock_init(&dev->iowait_lock); |
77241056 MM |
1396 | INIT_LIST_HEAD(&dev->txwait); |
1397 | INIT_LIST_HEAD(&dev->memwait); | |
1398 | ||
45842abb MM |
1399 | ret = verbs_txreq_init(dev); |
1400 | if (ret) | |
77241056 | 1401 | goto err_verbs_txreq; |
77241056 MM |
1402 | |
1403 | /* | |
1404 | * The system image GUID is supposed to be the same for all | |
1405 | * HFIs in a single system but since there can be other | |
1406 | * device types in the system, we can't be sure this is unique. | |
1407 | */ | |
1408 | if (!ib_hfi1_sys_image_guid) | |
1409 | ib_hfi1_sys_image_guid = cpu_to_be64(ppd->guid); | |
1410 | lcpysz = strlcpy(ibdev->name, class_name(), lcpysz); | |
1411 | strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz); | |
1412 | ibdev->owner = THIS_MODULE; | |
1413 | ibdev->node_guid = cpu_to_be64(ppd->guid); | |
77241056 | 1414 | ibdev->phys_port_cnt = dd->num_pports; |
77241056 | 1415 | ibdev->dma_device = &dd->pcidev->dev; |
77241056 | 1416 | ibdev->modify_device = modify_device; |
4331629f DD |
1417 | |
1418 | /* keep process mad in the driver */ | |
77241056 | 1419 | ibdev->process_mad = hfi1_process_mad; |
77241056 MM |
1420 | |
1421 | strncpy(ibdev->node_desc, init_utsname()->nodename, | |
1422 | sizeof(ibdev->node_desc)); | |
1423 | ||
ec3f2c12 DD |
1424 | /* |
1425 | * Fill in rvt info object. | |
1426 | */ | |
1427 | dd->verbs_dev.rdi.driver_f.port_callback = hfi1_create_port_files; | |
49dbb6cf DD |
1428 | dd->verbs_dev.rdi.driver_f.get_card_name = get_card_name; |
1429 | dd->verbs_dev.rdi.driver_f.get_pci_dev = get_pci_dev; | |
15723f06 | 1430 | dd->verbs_dev.rdi.driver_f.check_ah = hfi1_check_ah; |
8f1764fa | 1431 | dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah; |
25131463 | 1432 | dd->verbs_dev.rdi.driver_f.get_guid_be = hfi1_get_guid_be; |
45b59eef HC |
1433 | dd->verbs_dev.rdi.driver_f.query_port_state = query_port; |
1434 | dd->verbs_dev.rdi.driver_f.shut_down_port = shut_down_port; | |
1435 | dd->verbs_dev.rdi.driver_f.cap_mask_chg = hfi1_cap_mask_chg; | |
94d5171c HC |
1436 | /* |
1437 | * Fill in rvt info device attributes. | |
1438 | */ | |
1439 | hfi1_fill_device_attr(dd); | |
a2c2d608 DD |
1440 | |
1441 | /* queue pair */ | |
a2c2d608 DD |
1442 | dd->verbs_dev.rdi.dparms.qp_table_size = hfi1_qp_table_size; |
1443 | dd->verbs_dev.rdi.dparms.qpn_start = 0; | |
1444 | dd->verbs_dev.rdi.dparms.qpn_inc = 1; | |
1445 | dd->verbs_dev.rdi.dparms.qos_shift = dd->qos_shift; | |
1446 | dd->verbs_dev.rdi.dparms.qpn_res_start = kdeth_qp << 16; | |
1447 | dd->verbs_dev.rdi.dparms.qpn_res_end = | |
abd712da | 1448 | dd->verbs_dev.rdi.dparms.qpn_res_start + 65535; |
ec4274f1 DD |
1449 | dd->verbs_dev.rdi.dparms.max_rdma_atomic = HFI1_MAX_RDMA_ATOMIC; |
1450 | dd->verbs_dev.rdi.dparms.psn_mask = PSN_MASK; | |
1451 | dd->verbs_dev.rdi.dparms.psn_shift = PSN_SHIFT; | |
1452 | dd->verbs_dev.rdi.dparms.psn_modify_mask = PSN_MODIFY_MASK; | |
45b59eef HC |
1453 | dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_INTEL_OPA; |
1454 | dd->verbs_dev.rdi.dparms.max_mad_size = OPA_MGMT_MAD_SIZE; | |
1455 | ||
a2c2d608 DD |
1456 | dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc; |
1457 | dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free; | |
1458 | dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps; | |
1459 | dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset; | |
83693bd1 DD |
1460 | dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send; |
1461 | dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send; | |
46a80d62 | 1462 | dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _hfi1_schedule_send; |
ec4274f1 DD |
1463 | dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr; |
1464 | dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp; | |
1465 | dd->verbs_dev.rdi.driver_f.flush_qp_waiters = flush_qp_waiters; | |
1466 | dd->verbs_dev.rdi.driver_f.stop_send_queue = stop_send_queue; | |
1467 | dd->verbs_dev.rdi.driver_f.quiesce_qp = quiesce_qp; | |
1468 | dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp; | |
1469 | dd->verbs_dev.rdi.driver_f.mtu_from_qp = mtu_from_qp; | |
1470 | dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = mtu_to_path_mtu; | |
1471 | dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp; | |
1472 | dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp; | |
46a80d62 | 1473 | dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe; |
a2c2d608 | 1474 | |
abd712da DD |
1475 | /* completeion queue */ |
1476 | snprintf(dd->verbs_dev.rdi.dparms.cq_name, | |
1477 | sizeof(dd->verbs_dev.rdi.dparms.cq_name), | |
1478 | "hfi1_cq%d", dd->unit); | |
27807392 | 1479 | dd->verbs_dev.rdi.dparms.node = dd->node; |
abd712da | 1480 | |
a2c2d608 | 1481 | /* misc settings */ |
abd712da | 1482 | dd->verbs_dev.rdi.flags = 0; /* Let rdmavt handle it all */ |
895420dd | 1483 | dd->verbs_dev.rdi.dparms.lkey_table_size = hfi1_lkey_table_size; |
4eb06882 DD |
1484 | dd->verbs_dev.rdi.dparms.nports = dd->num_pports; |
1485 | dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd); | |
1486 | ||
1487 | ppd = dd->pport; | |
1488 | for (i = 0; i < dd->num_pports; i++, ppd++) | |
1489 | rvt_init_port(&dd->verbs_dev.rdi, | |
1490 | &ppd->ibport_data.rvp, | |
1491 | i, | |
1492 | ppd->pkeys); | |
ec3f2c12 DD |
1493 | |
1494 | ret = rvt_register_device(&dd->verbs_dev.rdi); | |
77241056 | 1495 | if (ret) |
9c4a311e | 1496 | goto err_verbs_txreq; |
77241056 MM |
1497 | |
1498 | ret = hfi1_verbs_register_sysfs(dd); | |
1499 | if (ret) | |
1500 | goto err_class; | |
1501 | ||
9c4a311e | 1502 | return ret; |
77241056 MM |
1503 | |
1504 | err_class: | |
ec3f2c12 | 1505 | rvt_unregister_device(&dd->verbs_dev.rdi); |
77241056 | 1506 | err_verbs_txreq: |
45842abb | 1507 | verbs_txreq_exit(dev); |
77241056 | 1508 | dd_dev_err(dd, "cannot register verbs: %d!\n", -ret); |
77241056 MM |
1509 | return ret; |
1510 | } | |
1511 | ||
1512 | void hfi1_unregister_ib_device(struct hfi1_devdata *dd) | |
1513 | { | |
1514 | struct hfi1_ibdev *dev = &dd->verbs_dev; | |
77241056 MM |
1515 | |
1516 | hfi1_verbs_unregister_sysfs(dd); | |
1517 | ||
ec3f2c12 | 1518 | rvt_unregister_device(&dd->verbs_dev.rdi); |
77241056 MM |
1519 | |
1520 | if (!list_empty(&dev->txwait)) | |
1521 | dd_dev_err(dd, "txwait list not empty!\n"); | |
1522 | if (!list_empty(&dev->memwait)) | |
1523 | dd_dev_err(dd, "memwait list not empty!\n"); | |
77241056 | 1524 | |
77241056 | 1525 | del_timer_sync(&dev->mem_timer); |
45842abb | 1526 | verbs_txreq_exit(dev); |
77241056 MM |
1527 | } |
1528 | ||
77241056 MM |
1529 | void hfi1_cnp_rcv(struct hfi1_packet *packet) |
1530 | { | |
1531 | struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data; | |
977940b8 AK |
1532 | struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); |
1533 | struct hfi1_ib_header *hdr = packet->hdr; | |
895420dd | 1534 | struct rvt_qp *qp = packet->qp; |
977940b8 AK |
1535 | u32 lqpn, rqpn = 0; |
1536 | u16 rlid = 0; | |
1537 | u8 sl, sc5, sc4_bit, svc_type; | |
1538 | bool sc4_set = has_sc4_bit(packet); | |
1539 | ||
1540 | switch (packet->qp->ibqp.qp_type) { | |
1541 | case IB_QPT_UC: | |
1542 | rlid = qp->remote_ah_attr.dlid; | |
1543 | rqpn = qp->remote_qpn; | |
1544 | svc_type = IB_CC_SVCTYPE_UC; | |
1545 | break; | |
1546 | case IB_QPT_RC: | |
1547 | rlid = qp->remote_ah_attr.dlid; | |
1548 | rqpn = qp->remote_qpn; | |
1549 | svc_type = IB_CC_SVCTYPE_RC; | |
1550 | break; | |
1551 | case IB_QPT_SMI: | |
1552 | case IB_QPT_GSI: | |
1553 | case IB_QPT_UD: | |
1554 | svc_type = IB_CC_SVCTYPE_UD; | |
1555 | break; | |
1556 | default: | |
4eb06882 | 1557 | ibp->rvp.n_pkt_drops++; |
977940b8 AK |
1558 | return; |
1559 | } | |
1560 | ||
1561 | sc4_bit = sc4_set << 4; | |
1562 | sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf; | |
1563 | sc5 |= sc4_bit; | |
1564 | sl = ibp->sc_to_sl[sc5]; | |
1565 | lqpn = qp->ibqp.qp_num; | |
1566 | ||
1567 | process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type); | |
77241056 | 1568 | } |