Commit | Line | Data |
---|---|---|
3ec648c6 | 1 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause |
77241056 | 2 | /* |
19d8b90a | 3 | * Copyright(c) 2015 - 2020 Intel Corporation. |
ddf65f28 | 4 | * Copyright(c) 2021 Cornelis Networks. |
77241056 MM |
5 | */ |
6 | ||
7 | #include <linux/pci.h> | |
8 | #include <linux/netdevice.h> | |
9 | #include <linux/vmalloc.h> | |
10 | #include <linux/delay.h> | |
03b92789 | 11 | #include <linux/xarray.h> |
77241056 MM |
12 | #include <linux/module.h> |
13 | #include <linux/printk.h> | |
14 | #include <linux/hrtimer.h> | |
8737ce95 | 15 | #include <linux/bitmap.h> |
98fa15f3 | 16 | #include <linux/numa.h> |
ec3f2c12 | 17 | #include <rdma/rdma_vt.h> |
77241056 MM |
18 | |
19 | #include "hfi.h" | |
20 | #include "device.h" | |
21 | #include "common.h" | |
6c63e423 | 22 | #include "trace.h" |
77241056 MM |
23 | #include "mad.h" |
24 | #include "sdma.h" | |
25 | #include "debugfs.h" | |
26 | #include "verbs.h" | |
affa48de | 27 | #include "aspm.h" |
4197344b | 28 | #include "affinity.h" |
d4829ea6 | 29 | #include "vnic.h" |
fe4e74ee | 30 | #include "exp_rcv.h" |
4730f4a6 | 31 | #include "netdev.h" |
77241056 MM |
32 | |
33 | #undef pr_fmt | |
34 | #define pr_fmt(fmt) DRIVER_NAME ": " fmt | |
35 | ||
36 | /* | |
37 | * min buffers we want to have per context, after driver | |
38 | */ | |
39 | #define HFI1_MIN_USER_CTXT_BUFCNT 7 | |
40 | ||
77241056 MM |
41 | #define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */ |
42 | #define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */ | |
43 | ||
57f97e96 MR |
44 | #define NUM_IB_PORTS 1 |
45 | ||
77241056 MM |
46 | /* |
47 | * Number of user receive contexts we are configured to use (to allow for more | |
48 | * pio buffers per ctxt, etc.) Zero means use one user context per CPU. | |
49 | */ | |
2ce6bf22 | 50 | int num_user_contexts = -1; |
5da9e742 | 51 | module_param_named(num_user_contexts, num_user_contexts, int, 0444); |
77241056 | 52 | MODULE_PARM_DESC( |
5da9e742 | 53 | num_user_contexts, "Set max number of user contexts to use (default: -1 will use the real (non-HT) CPU count)"); |
77241056 | 54 | |
5b55ea3b | 55 | uint krcvqs[RXE_NUM_DATA_VL]; |
77241056 | 56 | int krcvqsset; |
5b55ea3b | 57 | module_param_array(krcvqs, uint, &krcvqsset, S_IRUGO); |
82c2611d | 58 | MODULE_PARM_DESC(krcvqs, "Array of the number of non-control kernel receive queues by VL"); |
77241056 MM |
59 | |
60 | /* computed based on above array */ | |
429b6a72 | 61 | unsigned long n_krcvqs; |
77241056 MM |
62 | |
63 | static unsigned hfi1_rcvarr_split = 25; | |
64 | module_param_named(rcvarr_split, hfi1_rcvarr_split, uint, S_IRUGO); | |
65 | MODULE_PARM_DESC(rcvarr_split, "Percent of context's RcvArray entries used for Eager buffers"); | |
66 | ||
9746fa43 | 67 | static uint eager_buffer_size = (8 << 20); /* 8MB */ |
77241056 | 68 | module_param(eager_buffer_size, uint, S_IRUGO); |
9746fa43 | 69 | MODULE_PARM_DESC(eager_buffer_size, "Size of the eager buffers, default: 8MB"); |
77241056 MM |
70 | |
71 | static uint rcvhdrcnt = 2048; /* 2x the max eager buffer count */ | |
72 | module_param_named(rcvhdrcnt, rcvhdrcnt, uint, S_IRUGO); | |
73 | MODULE_PARM_DESC(rcvhdrcnt, "Receive header queue count (default 2048)"); | |
74 | ||
75 | static uint hfi1_hdrq_entsize = 32; | |
d9a6ce68 MM |
76 | module_param_named(hdrq_entsize, hfi1_hdrq_entsize, uint, 0444); |
77 | MODULE_PARM_DESC(hdrq_entsize, "Size of header queue entries: 2 - 8B, 16 - 64B, 32 - 128B (default)"); | |
77241056 MM |
78 | |
79 | unsigned int user_credit_return_threshold = 33; /* default is 33% */ | |
80 | module_param(user_credit_return_threshold, uint, S_IRUGO); | |
ecb95a02 | 81 | MODULE_PARM_DESC(user_credit_return_threshold, "Credit return threshold for user send contexts, return when unreturned credits passes this many blocks (in percent of allocated blocks, 0 is off)"); |
77241056 | 82 | |
03b92789 | 83 | DEFINE_XARRAY_FLAGS(hfi1_dev_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); |
77241056 | 84 | |
f2a3bc00 MR |
85 | static int hfi1_create_kctxt(struct hfi1_devdata *dd, |
86 | struct hfi1_pportdata *ppd) | |
77241056 | 87 | { |
f2a3bc00 | 88 | struct hfi1_ctxtdata *rcd; |
77241056 | 89 | int ret; |
77241056 | 90 | |
82c2611d NV |
91 | /* Control context has to be always 0 */ |
92 | BUILD_BUG_ON(HFI1_CTRL_CTXT != 0); | |
93 | ||
f2a3bc00 MR |
94 | ret = hfi1_create_ctxtdata(ppd, dd->node, &rcd); |
95 | if (ret < 0) { | |
96 | dd_dev_err(dd, "Kernel receive context allocation failed\n"); | |
97 | return ret; | |
98 | } | |
99 | ||
100 | /* | |
101 | * Set up the kernel context flags here and now because they use | |
102 | * default values for all receive side memories. User contexts will | |
103 | * be handled as they are created. | |
104 | */ | |
105 | rcd->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) | | |
106 | HFI1_CAP_KGET(NODROP_RHQ_FULL) | | |
107 | HFI1_CAP_KGET(NODROP_EGR_FULL) | | |
108 | HFI1_CAP_KGET(DMA_RTAIL); | |
109 | ||
110 | /* Control context must use DMA_RTAIL */ | |
111 | if (rcd->ctxt == HFI1_CTRL_CTXT) | |
112 | rcd->flags |= HFI1_CAP_DMA_RTAIL; | |
01c7fc50 MM |
113 | rcd->fast_handler = get_dma_rtail_setting(rcd) ? |
114 | handle_receive_interrupt_dma_rtail : | |
115 | handle_receive_interrupt_nodma_rtail; | |
01c7fc50 | 116 | |
2fb3b5ae | 117 | hfi1_set_seq_cnt(rcd, 1); |
f2a3bc00 MR |
118 | |
119 | rcd->sc = sc_alloc(dd, SC_ACK, rcd->rcvhdrqentsize, dd->node); | |
120 | if (!rcd->sc) { | |
121 | dd_dev_err(dd, "Kernel send context allocation failed\n"); | |
122 | return -ENOMEM; | |
123 | } | |
124 | hfi1_init_ctxt(rcd->sc); | |
125 | ||
126 | return 0; | |
127 | } | |
128 | ||
129 | /* | |
130 | * Create the receive context array and one or more kernel contexts | |
131 | */ | |
132 | int hfi1_create_kctxts(struct hfi1_devdata *dd) | |
133 | { | |
134 | u16 i; | |
135 | int ret; | |
136 | ||
953a9ceb | 137 | dd->rcd = kcalloc_node(dd->num_rcv_contexts, sizeof(*dd->rcd), |
377f111e | 138 | GFP_KERNEL, dd->node); |
806e6e1b | 139 | if (!dd->rcd) |
f2a3bc00 | 140 | return -ENOMEM; |
77241056 | 141 | |
2280740f | 142 | for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) { |
f2a3bc00 MR |
143 | ret = hfi1_create_kctxt(dd, dd->pport); |
144 | if (ret) | |
145 | goto bail; | |
77241056 MM |
146 | } |
147 | ||
148 | return 0; | |
f2a3bc00 | 149 | bail: |
f683c80c | 150 | for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) |
d295dbeb | 151 | hfi1_free_ctxt(dd->rcd[i]); |
f683c80c MR |
152 | |
153 | /* All the contexts should be freed, free the array */ | |
77241056 MM |
154 | kfree(dd->rcd); |
155 | dd->rcd = NULL; | |
156 | return ret; | |
157 | } | |
158 | ||
f683c80c | 159 | /* |
d295dbeb | 160 | * Helper routines for the receive context reference count (rcd and uctxt). |
f683c80c MR |
161 | */ |
162 | static void hfi1_rcd_init(struct hfi1_ctxtdata *rcd) | |
163 | { | |
164 | kref_init(&rcd->kref); | |
165 | } | |
166 | ||
f2a3bc00 MR |
167 | /** |
168 | * hfi1_rcd_free - When reference is zero clean up. | |
169 | * @kref: pointer to an initialized rcd data structure | |
170 | * | |
171 | */ | |
f683c80c MR |
172 | static void hfi1_rcd_free(struct kref *kref) |
173 | { | |
d295dbeb | 174 | unsigned long flags; |
f683c80c MR |
175 | struct hfi1_ctxtdata *rcd = |
176 | container_of(kref, struct hfi1_ctxtdata, kref); | |
177 | ||
d295dbeb MR |
178 | spin_lock_irqsave(&rcd->dd->uctxt_lock, flags); |
179 | rcd->dd->rcd[rcd->ctxt] = NULL; | |
180 | spin_unlock_irqrestore(&rcd->dd->uctxt_lock, flags); | |
181 | ||
bc5add09 MR |
182 | hfi1_free_ctxtdata(rcd->dd, rcd); |
183 | ||
f683c80c MR |
184 | kfree(rcd); |
185 | } | |
186 | ||
f2a3bc00 MR |
187 | /** |
188 | * hfi1_rcd_put - decrement reference for rcd | |
189 | * @rcd: pointer to an initialized rcd data structure | |
190 | * | |
191 | * Use this to put a reference after the init. | |
192 | */ | |
f683c80c MR |
193 | int hfi1_rcd_put(struct hfi1_ctxtdata *rcd) |
194 | { | |
195 | if (rcd) | |
196 | return kref_put(&rcd->kref, hfi1_rcd_free); | |
197 | ||
198 | return 0; | |
199 | } | |
200 | ||
f2a3bc00 MR |
201 | /** |
202 | * hfi1_rcd_get - increment reference for rcd | |
203 | * @rcd: pointer to an initialized rcd data structure | |
204 | * | |
205 | * Use this to get a reference after the init. | |
bc5add09 MR |
206 | * |
207 | * Return : reflect kref_get_unless_zero(), which returns non-zero on | |
208 | * increment, otherwise 0. | |
f2a3bc00 | 209 | */ |
bc5add09 | 210 | int hfi1_rcd_get(struct hfi1_ctxtdata *rcd) |
f683c80c | 211 | { |
bc5add09 | 212 | return kref_get_unless_zero(&rcd->kref); |
f683c80c MR |
213 | } |
214 | ||
f2a3bc00 MR |
215 | /** |
216 | * allocate_rcd_index - allocate an rcd index from the rcd array | |
217 | * @dd: pointer to a valid devdata structure | |
218 | * @rcd: rcd data structure to assign | |
219 | * @index: pointer to index that is allocated | |
220 | * | |
221 | * Find an empty index in the rcd array, and assign the given rcd to it. | |
222 | * If the array is full, we are EBUSY. | |
223 | * | |
224 | */ | |
d295dbeb | 225 | static int allocate_rcd_index(struct hfi1_devdata *dd, |
f2a3bc00 MR |
226 | struct hfi1_ctxtdata *rcd, u16 *index) |
227 | { | |
228 | unsigned long flags; | |
229 | u16 ctxt; | |
230 | ||
231 | spin_lock_irqsave(&dd->uctxt_lock, flags); | |
232 | for (ctxt = 0; ctxt < dd->num_rcv_contexts; ctxt++) | |
233 | if (!dd->rcd[ctxt]) | |
234 | break; | |
235 | ||
236 | if (ctxt < dd->num_rcv_contexts) { | |
237 | rcd->ctxt = ctxt; | |
238 | dd->rcd[ctxt] = rcd; | |
239 | hfi1_rcd_init(rcd); | |
240 | } | |
241 | spin_unlock_irqrestore(&dd->uctxt_lock, flags); | |
242 | ||
243 | if (ctxt >= dd->num_rcv_contexts) | |
244 | return -EBUSY; | |
245 | ||
246 | *index = ctxt; | |
247 | ||
248 | return 0; | |
249 | } | |
250 | ||
d59075ad MR |
251 | /** |
252 | * hfi1_rcd_get_by_index_safe - validate the ctxt index before accessing the | |
253 | * array | |
254 | * @dd: pointer to a valid devdata structure | |
255 | * @ctxt: the index of an possilbe rcd | |
256 | * | |
257 | * This is a wrapper for hfi1_rcd_get_by_index() to validate that the given | |
258 | * ctxt index is valid. | |
259 | * | |
260 | * The caller is responsible for making the _put(). | |
261 | * | |
262 | */ | |
263 | struct hfi1_ctxtdata *hfi1_rcd_get_by_index_safe(struct hfi1_devdata *dd, | |
264 | u16 ctxt) | |
265 | { | |
266 | if (ctxt < dd->num_rcv_contexts) | |
267 | return hfi1_rcd_get_by_index(dd, ctxt); | |
268 | ||
269 | return NULL; | |
270 | } | |
271 | ||
d295dbeb | 272 | /** |
bf194997 | 273 | * hfi1_rcd_get_by_index - get by index |
d295dbeb MR |
274 | * @dd: pointer to a valid devdata structure |
275 | * @ctxt: the index of an possilbe rcd | |
276 | * | |
277 | * We need to protect access to the rcd array. If access is needed to | |
278 | * one or more index, get the protecting spinlock and then increment the | |
279 | * kref. | |
280 | * | |
281 | * The caller is responsible for making the _put(). | |
282 | * | |
283 | */ | |
284 | struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt) | |
285 | { | |
286 | unsigned long flags; | |
287 | struct hfi1_ctxtdata *rcd = NULL; | |
288 | ||
289 | spin_lock_irqsave(&dd->uctxt_lock, flags); | |
290 | if (dd->rcd[ctxt]) { | |
291 | rcd = dd->rcd[ctxt]; | |
bc5add09 MR |
292 | if (!hfi1_rcd_get(rcd)) |
293 | rcd = NULL; | |
d295dbeb MR |
294 | } |
295 | spin_unlock_irqrestore(&dd->uctxt_lock, flags); | |
296 | ||
297 | return rcd; | |
298 | } | |
299 | ||
77241056 | 300 | /* |
d295dbeb MR |
301 | * Common code for user and kernel context create and setup. |
302 | * NOTE: the initial kref is done here (hf1_rcd_init()). | |
77241056 | 303 | */ |
f2a3bc00 MR |
304 | int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, |
305 | struct hfi1_ctxtdata **context) | |
77241056 MM |
306 | { |
307 | struct hfi1_devdata *dd = ppd->dd; | |
308 | struct hfi1_ctxtdata *rcd; | |
309 | unsigned kctxt_ngroups = 0; | |
310 | u32 base; | |
311 | ||
312 | if (dd->rcv_entries.nctxt_extra > | |
2280740f | 313 | dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt) |
77241056 | 314 | kctxt_ngroups = (dd->rcv_entries.nctxt_extra - |
2280740f | 315 | (dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt)); |
4dfe7cce | 316 | rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, numa); |
77241056 MM |
317 | if (rcd) { |
318 | u32 rcvtids, max_entries; | |
f2a3bc00 MR |
319 | u16 ctxt; |
320 | int ret; | |
77241056 | 321 | |
f2a3bc00 MR |
322 | ret = allocate_rcd_index(dd, rcd, &ctxt); |
323 | if (ret) { | |
324 | *context = NULL; | |
325 | kfree(rcd); | |
326 | return ret; | |
327 | } | |
328 | ||
77241056 | 329 | INIT_LIST_HEAD(&rcd->qp_wait_list); |
c8314811 | 330 | hfi1_exp_tid_group_init(rcd); |
77241056 MM |
331 | rcd->ppd = ppd; |
332 | rcd->dd = dd; | |
957558c9 | 333 | rcd->numa_id = numa; |
77241056 | 334 | rcd->rcv_array_groups = dd->rcv_entries.ngroups; |
b0ba3c18 | 335 | rcd->rhf_rcv_function_map = normal_rhf_rcv_functions; |
f6a3cfec MM |
336 | rcd->slow_handler = handle_receive_interrupt; |
337 | rcd->do_interrupt = rcd->slow_handler; | |
0bae02d5 | 338 | rcd->msix_intr = CCE_NUM_MSIX_VECTORS; |
77241056 | 339 | |
ed71e86a | 340 | mutex_init(&rcd->exp_mutex); |
37356e78 KW |
341 | spin_lock_init(&rcd->exp_lock); |
342 | INIT_LIST_HEAD(&rcd->flow_queue.queue_head); | |
838b6fd2 | 343 | INIT_LIST_HEAD(&rcd->rarr_queue.queue_head); |
77241056 | 344 | |
d2590edc | 345 | hfi1_cdbg(PROC, "setting up context %u", rcd->ctxt); |
d295dbeb | 346 | |
77241056 MM |
347 | /* |
348 | * Calculate the context's RcvArray entry starting point. | |
349 | * We do this here because we have to take into account all | |
350 | * the RcvArray entries that previous context would have | |
2280740f VN |
351 | * taken and we have to account for any extra groups assigned |
352 | * to the static (kernel) or dynamic (vnic/user) contexts. | |
77241056 | 353 | */ |
2280740f | 354 | if (ctxt < dd->first_dyn_alloc_ctxt) { |
77241056 MM |
355 | if (ctxt < kctxt_ngroups) { |
356 | base = ctxt * (dd->rcv_entries.ngroups + 1); | |
357 | rcd->rcv_array_groups++; | |
ee495ada | 358 | } else { |
77241056 MM |
359 | base = kctxt_ngroups + |
360 | (ctxt * dd->rcv_entries.ngroups); | |
ee495ada | 361 | } |
77241056 | 362 | } else { |
2280740f | 363 | u16 ct = ctxt - dd->first_dyn_alloc_ctxt; |
77241056 MM |
364 | |
365 | base = ((dd->n_krcv_queues * dd->rcv_entries.ngroups) + | |
366 | kctxt_ngroups); | |
367 | if (ct < dd->rcv_entries.nctxt_extra) { | |
368 | base += ct * (dd->rcv_entries.ngroups + 1); | |
369 | rcd->rcv_array_groups++; | |
ee495ada | 370 | } else { |
77241056 MM |
371 | base += dd->rcv_entries.nctxt_extra + |
372 | (ct * dd->rcv_entries.ngroups); | |
ee495ada | 373 | } |
77241056 MM |
374 | } |
375 | rcd->eager_base = base * dd->rcv_entries.group_size; | |
376 | ||
77241056 MM |
377 | rcd->rcvhdrq_cnt = rcvhdrcnt; |
378 | rcd->rcvhdrqentsize = hfi1_hdrq_entsize; | |
40442b30 MM |
379 | rcd->rhf_offset = |
380 | rcd->rcvhdrqentsize - sizeof(u64) / sizeof(u32); | |
77241056 MM |
381 | /* |
382 | * Simple Eager buffer allocation: we have already pre-allocated | |
383 | * the number of RcvArray entry groups. Each ctxtdata structure | |
384 | * holds the number of groups for that context. | |
385 | * | |
386 | * To follow CSR requirements and maintain cacheline alignment, | |
387 | * make sure all sizes and bases are multiples of group_size. | |
388 | * | |
389 | * The expected entry count is what is left after assigning | |
390 | * eager. | |
391 | */ | |
392 | max_entries = rcd->rcv_array_groups * | |
393 | dd->rcv_entries.group_size; | |
394 | rcvtids = ((max_entries * hfi1_rcvarr_split) / 100); | |
395 | rcd->egrbufs.count = round_down(rcvtids, | |
396 | dd->rcv_entries.group_size); | |
397 | if (rcd->egrbufs.count > MAX_EAGER_ENTRIES) { | |
398 | dd_dev_err(dd, "ctxt%u: requested too many RcvArray entries.\n", | |
399 | rcd->ctxt); | |
400 | rcd->egrbufs.count = MAX_EAGER_ENTRIES; | |
401 | } | |
6c63e423 | 402 | hfi1_cdbg(PROC, |
d2590edc | 403 | "ctxt%u: max Eager buffer RcvArray entries: %u", |
6c63e423 | 404 | rcd->ctxt, rcd->egrbufs.count); |
77241056 MM |
405 | |
406 | /* | |
407 | * Allocate array that will hold the eager buffer accounting | |
408 | * data. | |
409 | * This will allocate the maximum possible buffer count based | |
410 | * on the value of the RcvArray split parameter. | |
411 | * The resulting value will be rounded down to the closest | |
412 | * multiple of dd->rcv_entries.group_size. | |
413 | */ | |
953a9ceb KA |
414 | rcd->egrbufs.buffers = |
415 | kcalloc_node(rcd->egrbufs.count, | |
416 | sizeof(*rcd->egrbufs.buffers), | |
417 | GFP_KERNEL, numa); | |
77241056 MM |
418 | if (!rcd->egrbufs.buffers) |
419 | goto bail; | |
953a9ceb KA |
420 | rcd->egrbufs.rcvtids = |
421 | kcalloc_node(rcd->egrbufs.count, | |
422 | sizeof(*rcd->egrbufs.rcvtids), | |
423 | GFP_KERNEL, numa); | |
77241056 MM |
424 | if (!rcd->egrbufs.rcvtids) |
425 | goto bail; | |
426 | rcd->egrbufs.size = eager_buffer_size; | |
427 | /* | |
428 | * The size of the buffers programmed into the RcvArray | |
429 | * entries needs to be big enough to handle the highest | |
430 | * MTU supported. | |
431 | */ | |
432 | if (rcd->egrbufs.size < hfi1_max_mtu) { | |
433 | rcd->egrbufs.size = __roundup_pow_of_two(hfi1_max_mtu); | |
6c63e423 | 434 | hfi1_cdbg(PROC, |
d2590edc | 435 | "ctxt%u: eager bufs size too small. Adjusting to %u", |
77241056 MM |
436 | rcd->ctxt, rcd->egrbufs.size); |
437 | } | |
438 | rcd->egrbufs.rcvtid_size = HFI1_MAX_EAGER_BUFFER_SIZE; | |
439 | ||
2280740f VN |
440 | /* Applicable only for statically created kernel contexts */ |
441 | if (ctxt < dd->first_dyn_alloc_ctxt) { | |
b448bf9a SS |
442 | rcd->opstats = kzalloc_node(sizeof(*rcd->opstats), |
443 | GFP_KERNEL, numa); | |
806e6e1b | 444 | if (!rcd->opstats) |
77241056 | 445 | goto bail; |
37356e78 KW |
446 | |
447 | /* Initialize TID flow generations for the context */ | |
448 | hfi1_kern_init_ctxt_generations(rcd); | |
77241056 | 449 | } |
f683c80c | 450 | |
f2a3bc00 MR |
451 | *context = rcd; |
452 | return 0; | |
77241056 | 453 | } |
f2a3bc00 | 454 | |
77241056 | 455 | bail: |
f2a3bc00 | 456 | *context = NULL; |
d295dbeb | 457 | hfi1_free_ctxt(rcd); |
f2a3bc00 MR |
458 | return -ENOMEM; |
459 | } | |
460 | ||
461 | /** | |
bf194997 | 462 | * hfi1_free_ctxt - free context |
f2a3bc00 MR |
463 | * @rcd: pointer to an initialized rcd data structure |
464 | * | |
d295dbeb MR |
465 | * This wrapper is the free function that matches hfi1_create_ctxtdata(). |
466 | * When a context is done being used (kernel or user), this function is called | |
1b8ba6e4 | 467 | * for the "final" put to match the kref init from hfi1_create_ctxtdata(). |
d295dbeb MR |
468 | * Other users of the context do a get/put sequence to make sure that the |
469 | * structure isn't removed while in use. | |
f2a3bc00 | 470 | */ |
d295dbeb | 471 | void hfi1_free_ctxt(struct hfi1_ctxtdata *rcd) |
f2a3bc00 | 472 | { |
d295dbeb | 473 | hfi1_rcd_put(rcd); |
77241056 MM |
474 | } |
475 | ||
77241056 MM |
476 | /* |
477 | * Select the largest ccti value over all SLs to determine the intra- | |
478 | * packet gap for the link. | |
479 | * | |
480 | * called with cca_timer_lock held (to protect access to cca_timer | |
481 | * array), and rcu_read_lock() (to protect access to cc_state). | |
482 | */ | |
483 | void set_link_ipg(struct hfi1_pportdata *ppd) | |
484 | { | |
485 | struct hfi1_devdata *dd = ppd->dd; | |
486 | struct cc_state *cc_state; | |
487 | int i; | |
488 | u16 cce, ccti_limit, max_ccti = 0; | |
489 | u16 shift, mult; | |
490 | u64 src; | |
491 | u32 current_egress_rate; /* Mbits /sec */ | |
f93e91a0 | 492 | u64 max_pkt_time; |
77241056 MM |
493 | /* |
494 | * max_pkt_time is the maximum packet egress time in units | |
495 | * of the fabric clock period 1/(805 MHz). | |
496 | */ | |
497 | ||
498 | cc_state = get_cc_state(ppd); | |
499 | ||
d125a6c6 | 500 | if (!cc_state) |
77241056 MM |
501 | /* |
502 | * This should _never_ happen - rcu_read_lock() is held, | |
503 | * and set_link_ipg() should not be called if cc_state | |
504 | * is NULL. | |
505 | */ | |
506 | return; | |
507 | ||
508 | for (i = 0; i < OPA_MAX_SLS; i++) { | |
509 | u16 ccti = ppd->cca_timer[i].ccti; | |
510 | ||
511 | if (ccti > max_ccti) | |
512 | max_ccti = ccti; | |
513 | } | |
514 | ||
515 | ccti_limit = cc_state->cct.ccti_limit; | |
516 | if (max_ccti > ccti_limit) | |
517 | max_ccti = ccti_limit; | |
518 | ||
519 | cce = cc_state->cct.entries[max_ccti].entry; | |
520 | shift = (cce & 0xc000) >> 14; | |
521 | mult = (cce & 0x3fff); | |
522 | ||
523 | current_egress_rate = active_egress_rate(ppd); | |
524 | ||
525 | max_pkt_time = egress_cycles(ppd->ibmaxlen, current_egress_rate); | |
526 | ||
527 | src = (max_pkt_time >> shift) * mult; | |
528 | ||
529 | src &= SEND_STATIC_RATE_CONTROL_CSR_SRC_RELOAD_SMASK; | |
530 | src <<= SEND_STATIC_RATE_CONTROL_CSR_SRC_RELOAD_SHIFT; | |
531 | ||
532 | write_csr(dd, SEND_STATIC_RATE_CONTROL, src); | |
533 | } | |
534 | ||
535 | static enum hrtimer_restart cca_timer_fn(struct hrtimer *t) | |
536 | { | |
537 | struct cca_timer *cca_timer; | |
538 | struct hfi1_pportdata *ppd; | |
539 | int sl; | |
d35cf744 | 540 | u16 ccti_timer, ccti_min; |
77241056 | 541 | struct cc_state *cc_state; |
b77d713a | 542 | unsigned long flags; |
d35cf744 | 543 | enum hrtimer_restart ret = HRTIMER_NORESTART; |
77241056 MM |
544 | |
545 | cca_timer = container_of(t, struct cca_timer, hrtimer); | |
546 | ppd = cca_timer->ppd; | |
547 | sl = cca_timer->sl; | |
548 | ||
549 | rcu_read_lock(); | |
550 | ||
551 | cc_state = get_cc_state(ppd); | |
552 | ||
d125a6c6 | 553 | if (!cc_state) { |
77241056 MM |
554 | rcu_read_unlock(); |
555 | return HRTIMER_NORESTART; | |
556 | } | |
557 | ||
558 | /* | |
559 | * 1) decrement ccti for SL | |
560 | * 2) calculate IPG for link (set_link_ipg()) | |
561 | * 3) restart timer, unless ccti is at min value | |
562 | */ | |
563 | ||
564 | ccti_min = cc_state->cong_setting.entries[sl].ccti_min; | |
565 | ccti_timer = cc_state->cong_setting.entries[sl].ccti_timer; | |
566 | ||
b77d713a | 567 | spin_lock_irqsave(&ppd->cca_timer_lock, flags); |
77241056 | 568 | |
d35cf744 | 569 | if (cca_timer->ccti > ccti_min) { |
77241056 MM |
570 | cca_timer->ccti--; |
571 | set_link_ipg(ppd); | |
572 | } | |
573 | ||
d35cf744 | 574 | if (cca_timer->ccti > ccti_min) { |
77241056 MM |
575 | unsigned long nsec = 1024 * ccti_timer; |
576 | /* ccti_timer is in units of 1.024 usec */ | |
577 | hrtimer_forward_now(t, ns_to_ktime(nsec)); | |
d35cf744 | 578 | ret = HRTIMER_RESTART; |
77241056 | 579 | } |
d35cf744 JJ |
580 | |
581 | spin_unlock_irqrestore(&ppd->cca_timer_lock, flags); | |
582 | rcu_read_unlock(); | |
583 | return ret; | |
77241056 MM |
584 | } |
585 | ||
586 | /* | |
587 | * Common code for initializing the physical port structure. | |
588 | */ | |
589 | void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, | |
1fb7f897 | 590 | struct hfi1_devdata *dd, u8 hw_pidx, u32 port) |
77241056 | 591 | { |
8adf71fa | 592 | int i; |
77241056 | 593 | uint default_pkey_idx; |
8adf71fa | 594 | struct cc_state *cc_state; |
77241056 MM |
595 | |
596 | ppd->dd = dd; | |
597 | ppd->hw_pidx = hw_pidx; | |
598 | ppd->port = port; /* IB port number, not index */ | |
07190076 KA |
599 | ppd->prev_link_width = LINK_WIDTH_DEFAULT; |
600 | /* | |
601 | * There are C_VL_COUNT number of PortVLXmitWait counters. | |
602 | * Adding 1 to C_VL_COUNT to include the PortXmitWait counter. | |
603 | */ | |
604 | for (i = 0; i < C_VL_COUNT + 1; i++) { | |
605 | ppd->port_vl_xmit_wait_last[i] = 0; | |
606 | ppd->vl_xmit_flit_cnt[i] = 0; | |
607 | } | |
77241056 MM |
608 | |
609 | default_pkey_idx = 1; | |
610 | ||
611 | ppd->pkeys[default_pkey_idx] = DEFAULT_P_KEY; | |
53526500 | 612 | ppd->part_enforce |= HFI1_PART_ENFORCE_IN; |
62004871 | 613 | ppd->pkeys[0] = 0x8001; |
77241056 MM |
614 | |
615 | INIT_WORK(&ppd->link_vc_work, handle_verify_cap); | |
616 | INIT_WORK(&ppd->link_up_work, handle_link_up); | |
617 | INIT_WORK(&ppd->link_down_work, handle_link_down); | |
618 | INIT_WORK(&ppd->freeze_work, handle_freeze); | |
619 | INIT_WORK(&ppd->link_downgrade_work, handle_link_downgrade); | |
620 | INIT_WORK(&ppd->sma_message_work, handle_sma_message); | |
621 | INIT_WORK(&ppd->link_bounce_work, handle_link_bounce); | |
673b975f | 622 | INIT_DELAYED_WORK(&ppd->start_link_work, handle_start_link); |
fb9036dd | 623 | INIT_WORK(&ppd->linkstate_active_work, receive_interrupt_work); |
8ebd4cf1 EH |
624 | INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event); |
625 | ||
77241056 | 626 | mutex_init(&ppd->hls_lock); |
77241056 MM |
627 | spin_lock_init(&ppd->qsfp_info.qsfp_lock); |
628 | ||
8ebd4cf1 | 629 | ppd->qsfp_info.ppd = ppd; |
77241056 MM |
630 | ppd->sm_trap_qp = 0x0; |
631 | ppd->sa_qp = 0x1; | |
632 | ||
633 | ppd->hfi1_wq = NULL; | |
634 | ||
635 | spin_lock_init(&ppd->cca_timer_lock); | |
636 | ||
637 | for (i = 0; i < OPA_MAX_SLS; i++) { | |
638 | hrtimer_init(&ppd->cca_timer[i].hrtimer, CLOCK_MONOTONIC, | |
639 | HRTIMER_MODE_REL); | |
640 | ppd->cca_timer[i].ppd = ppd; | |
641 | ppd->cca_timer[i].sl = i; | |
642 | ppd->cca_timer[i].ccti = 0; | |
643 | ppd->cca_timer[i].hrtimer.function = cca_timer_fn; | |
644 | } | |
645 | ||
646 | ppd->cc_max_table_entries = IB_CC_TABLE_CAP_DEFAULT; | |
647 | ||
648 | spin_lock_init(&ppd->cc_state_lock); | |
649 | spin_lock_init(&ppd->cc_log_lock); | |
8adf71fa JX |
650 | cc_state = kzalloc(sizeof(*cc_state), GFP_KERNEL); |
651 | RCU_INIT_POINTER(ppd->cc_state, cc_state); | |
652 | if (!cc_state) | |
77241056 MM |
653 | goto bail; |
654 | return; | |
655 | ||
656 | bail: | |
57f97e96 | 657 | dd_dev_err(dd, "Congestion Control Agent disabled for port %d\n", port); |
77241056 MM |
658 | } |
659 | ||
660 | /* | |
661 | * Do initialization for device that is only needed on | |
662 | * first detect, not on resets. | |
663 | */ | |
664 | static int loadtime_init(struct hfi1_devdata *dd) | |
665 | { | |
666 | return 0; | |
667 | } | |
668 | ||
669 | /** | |
670 | * init_after_reset - re-initialize after a reset | |
671 | * @dd: the hfi1_ib device | |
672 | * | |
673 | * sanity check at least some of the values after reset, and | |
674 | * ensure no receive or transmit (explicitly, in case reset | |
675 | * failed | |
676 | */ | |
677 | static int init_after_reset(struct hfi1_devdata *dd) | |
678 | { | |
679 | int i; | |
d295dbeb | 680 | struct hfi1_ctxtdata *rcd; |
77241056 MM |
681 | /* |
682 | * Ensure chip does no sends or receives, tail updates, or | |
683 | * pioavail updates while we re-initialize. This is mostly | |
684 | * for the driver data structures, not chip registers. | |
685 | */ | |
d295dbeb MR |
686 | for (i = 0; i < dd->num_rcv_contexts; i++) { |
687 | rcd = hfi1_rcd_get_by_index(dd, i); | |
77241056 | 688 | hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS | |
2250563e | 689 | HFI1_RCVCTRL_INTRAVAIL_DIS | |
d295dbeb MR |
690 | HFI1_RCVCTRL_TAILUPD_DIS, rcd); |
691 | hfi1_rcd_put(rcd); | |
692 | } | |
77241056 MM |
693 | pio_send_control(dd, PSC_GLOBAL_DISABLE); |
694 | for (i = 0; i < dd->num_send_contexts; i++) | |
695 | sc_disable(dd->send_contexts[i].sc); | |
696 | ||
697 | return 0; | |
698 | } | |
699 | ||
700 | static void enable_chip(struct hfi1_devdata *dd) | |
701 | { | |
d295dbeb | 702 | struct hfi1_ctxtdata *rcd; |
77241056 | 703 | u32 rcvmask; |
e6f7622d | 704 | u16 i; |
77241056 MM |
705 | |
706 | /* enable PIO send */ | |
707 | pio_send_control(dd, PSC_GLOBAL_ENABLE); | |
708 | ||
709 | /* | |
710 | * Enable kernel ctxts' receive and receive interrupt. | |
711 | * Other ctxts done as user opens and initializes them. | |
712 | */ | |
2280740f | 713 | for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) { |
d295dbeb MR |
714 | rcd = hfi1_rcd_get_by_index(dd, i); |
715 | if (!rcd) | |
716 | continue; | |
566c157c | 717 | rcvmask = HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB; |
d295dbeb | 718 | rcvmask |= HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ? |
77241056 | 719 | HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS; |
d295dbeb | 720 | if (!HFI1_CAP_KGET_MASK(rcd->flags, MULTI_PKT_EGR)) |
77241056 | 721 | rcvmask |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB; |
d295dbeb | 722 | if (HFI1_CAP_KGET_MASK(rcd->flags, NODROP_RHQ_FULL)) |
77241056 | 723 | rcvmask |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; |
d295dbeb | 724 | if (HFI1_CAP_KGET_MASK(rcd->flags, NODROP_EGR_FULL)) |
77241056 | 725 | rcvmask |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; |
37356e78 KW |
726 | if (HFI1_CAP_IS_KSET(TID_RDMA)) |
727 | rcvmask |= HFI1_RCVCTRL_TIDFLOW_ENB; | |
d295dbeb MR |
728 | hfi1_rcvctrl(dd, rcvmask, rcd); |
729 | sc_enable(rcd->sc); | |
730 | hfi1_rcd_put(rcd); | |
77241056 MM |
731 | } |
732 | } | |
733 | ||
734 | /** | |
735 | * create_workqueues - create per port workqueues | |
736 | * @dd: the hfi1_ib device | |
737 | */ | |
738 | static int create_workqueues(struct hfi1_devdata *dd) | |
739 | { | |
740 | int pidx; | |
741 | struct hfi1_pportdata *ppd; | |
742 | ||
743 | for (pidx = 0; pidx < dd->num_pports; ++pidx) { | |
744 | ppd = dd->pport + pidx; | |
745 | if (!ppd->hfi1_wq) { | |
77241056 | 746 | ppd->hfi1_wq = |
0a226edd MM |
747 | alloc_workqueue( |
748 | "hfi%d_%d", | |
4c4b1996 MM |
749 | WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE | |
750 | WQ_MEM_RECLAIM, | |
dd1ed108 | 751 | HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES, |
0a226edd | 752 | dd->unit, pidx); |
77241056 MM |
753 | if (!ppd->hfi1_wq) |
754 | goto wq_error; | |
755 | } | |
71d47008 SS |
756 | if (!ppd->link_wq) { |
757 | /* | |
758 | * Make the link workqueue single-threaded to enforce | |
759 | * serialization. | |
760 | */ | |
761 | ppd->link_wq = | |
762 | alloc_workqueue( | |
763 | "hfi_link_%d_%d", | |
764 | WQ_SYSFS | WQ_MEM_RECLAIM | WQ_UNBOUND, | |
765 | 1, /* max_active */ | |
766 | dd->unit, pidx); | |
767 | if (!ppd->link_wq) | |
768 | goto wq_error; | |
769 | } | |
77241056 MM |
770 | } |
771 | return 0; | |
772 | wq_error: | |
0a226edd | 773 | pr_err("alloc_workqueue failed for port %d\n", pidx + 1); |
77241056 MM |
774 | for (pidx = 0; pidx < dd->num_pports; ++pidx) { |
775 | ppd = dd->pport + pidx; | |
776 | if (ppd->hfi1_wq) { | |
777 | destroy_workqueue(ppd->hfi1_wq); | |
778 | ppd->hfi1_wq = NULL; | |
779 | } | |
71d47008 SS |
780 | if (ppd->link_wq) { |
781 | destroy_workqueue(ppd->link_wq); | |
782 | ppd->link_wq = NULL; | |
783 | } | |
77241056 MM |
784 | } |
785 | return -ENOMEM; | |
786 | } | |
787 | ||
28b70cd9 KW |
788 | /** |
789 | * destroy_workqueues - destroy per port workqueues | |
790 | * @dd: the hfi1_ib device | |
791 | */ | |
792 | static void destroy_workqueues(struct hfi1_devdata *dd) | |
793 | { | |
794 | int pidx; | |
795 | struct hfi1_pportdata *ppd; | |
796 | ||
797 | for (pidx = 0; pidx < dd->num_pports; ++pidx) { | |
798 | ppd = dd->pport + pidx; | |
799 | ||
800 | if (ppd->hfi1_wq) { | |
801 | destroy_workqueue(ppd->hfi1_wq); | |
802 | ppd->hfi1_wq = NULL; | |
803 | } | |
2315ec12 KW |
804 | if (ppd->link_wq) { |
805 | destroy_workqueue(ppd->link_wq); | |
806 | ppd->link_wq = NULL; | |
807 | } | |
28b70cd9 KW |
808 | } |
809 | } | |
810 | ||
a2f7bbdc MR |
811 | /** |
812 | * enable_general_intr() - Enable the IRQs that will be handled by the | |
813 | * general interrupt handler. | |
814 | * @dd: valid devdata | |
815 | * | |
816 | */ | |
817 | static void enable_general_intr(struct hfi1_devdata *dd) | |
818 | { | |
819 | set_intr_bits(dd, CCE_ERR_INT, MISC_ERR_INT, true); | |
820 | set_intr_bits(dd, PIO_ERR_INT, TXE_ERR_INT, true); | |
821 | set_intr_bits(dd, IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END, true); | |
822 | set_intr_bits(dd, PBC_INT, GPIO_ASSERT_INT, true); | |
823 | set_intr_bits(dd, TCRIT_INT, TCRIT_INT, true); | |
824 | set_intr_bits(dd, IS_DC_START, IS_DC_END, true); | |
825 | set_intr_bits(dd, IS_SENDCREDIT_START, IS_SENDCREDIT_END, true); | |
826 | } | |
827 | ||
77241056 MM |
828 | /** |
829 | * hfi1_init - do the actual initialization sequence on the chip | |
830 | * @dd: the hfi1_ib device | |
831 | * @reinit: re-initializing, so don't allocate new memory | |
832 | * | |
833 | * Do the actual initialization sequence on the chip. This is done | |
834 | * both from the init routine called from the PCI infrastructure, and | |
835 | * when we reset the chip, or detect that it was reset internally, | |
836 | * or it's administratively re-enabled. | |
837 | * | |
838 | * Memory allocation here and in called routines is only done in | |
839 | * the first case (reinit == 0). We have to be careful, because even | |
840 | * without memory allocation, we need to re-write all the chip registers | |
841 | * TIDs, etc. after the reset or enable has completed. | |
842 | */ | |
843 | int hfi1_init(struct hfi1_devdata *dd, int reinit) | |
844 | { | |
845 | int ret = 0, pidx, lastfail = 0; | |
e6f7622d MR |
846 | unsigned long len; |
847 | u16 i; | |
77241056 MM |
848 | struct hfi1_ctxtdata *rcd; |
849 | struct hfi1_pportdata *ppd; | |
850 | ||
77241056 MM |
851 | /* Set up send low level handlers */ |
852 | dd->process_pio_send = hfi1_verbs_send_pio; | |
853 | dd->process_dma_send = hfi1_verbs_send_dma; | |
854 | dd->pio_inline_send = pio_copy; | |
64551ede | 855 | dd->process_vnic_dma_send = hfi1_vnic_send_dma; |
77241056 | 856 | |
995deafa | 857 | if (is_ax(dd)) { |
77241056 | 858 | atomic_set(&dd->drop_packet, DROP_PACKET_ON); |
cd47b594 | 859 | dd->do_drop = true; |
77241056 MM |
860 | } else { |
861 | atomic_set(&dd->drop_packet, DROP_PACKET_OFF); | |
cd47b594 | 862 | dd->do_drop = false; |
77241056 MM |
863 | } |
864 | ||
865 | /* make sure the link is not "up" */ | |
866 | for (pidx = 0; pidx < dd->num_pports; ++pidx) { | |
867 | ppd = dd->pport + pidx; | |
868 | ppd->linkup = 0; | |
869 | } | |
870 | ||
871 | if (reinit) | |
872 | ret = init_after_reset(dd); | |
873 | else | |
874 | ret = loadtime_init(dd); | |
875 | if (ret) | |
876 | goto done; | |
877 | ||
878 | /* dd->rcd can be NULL if early initialization failed */ | |
2280740f | 879 | for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) { |
77241056 MM |
880 | /* |
881 | * Set up the (kernel) rcvhdr queue and egr TIDs. If doing | |
882 | * re-init, the simplest way to handle this is to free | |
883 | * existing, and re-allocate. | |
884 | * Need to re-create rest of ctxt 0 ctxtdata as well. | |
885 | */ | |
d295dbeb | 886 | rcd = hfi1_rcd_get_by_index(dd, i); |
77241056 MM |
887 | if (!rcd) |
888 | continue; | |
889 | ||
77241056 MM |
890 | lastfail = hfi1_create_rcvhdrq(dd, rcd); |
891 | if (!lastfail) | |
892 | lastfail = hfi1_setup_eagerbufs(rcd); | |
d22a207d KW |
893 | if (!lastfail) |
894 | lastfail = hfi1_kern_exp_rcv_init(rcd, reinit); | |
3923979e | 895 | if (lastfail) { |
77241056 | 896 | dd_dev_err(dd, |
17fb4f29 | 897 | "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n"); |
3923979e AD |
898 | ret = lastfail; |
899 | } | |
a2f7bbdc | 900 | /* enable IRQ */ |
d295dbeb | 901 | hfi1_rcd_put(rcd); |
77241056 | 902 | } |
77241056 MM |
903 | |
904 | /* Allocate enough memory for user event notification. */ | |
06e81e3e | 905 | len = PAGE_ALIGN(chip_rcv_contexts(dd) * HFI1_MAX_SHARED_CTXTS * |
84449917 | 906 | sizeof(*dd->events)); |
77241056 MM |
907 | dd->events = vmalloc_user(len); |
908 | if (!dd->events) | |
909 | dd_dev_err(dd, "Failed to allocate user events page\n"); | |
910 | /* | |
911 | * Allocate a page for device and port status. | |
912 | * Page will be shared amongst all user processes. | |
913 | */ | |
914 | dd->status = vmalloc_user(PAGE_SIZE); | |
915 | if (!dd->status) | |
916 | dd_dev_err(dd, "Failed to allocate dev status page\n"); | |
77241056 MM |
917 | for (pidx = 0; pidx < dd->num_pports; ++pidx) { |
918 | ppd = dd->pport + pidx; | |
919 | if (dd->status) | |
920 | /* Currently, we only have one port */ | |
921 | ppd->statusp = &dd->status->port; | |
922 | ||
923 | set_mtu(ppd); | |
924 | } | |
925 | ||
926 | /* enable chip even if we have an error, so we can debug cause */ | |
927 | enable_chip(dd); | |
928 | ||
77241056 MM |
929 | done: |
930 | /* | |
931 | * Set status even if port serdes is not initialized | |
932 | * so that diags will work. | |
933 | */ | |
934 | if (dd->status) | |
935 | dd->status->dev |= HFI1_STATUS_CHIP_PRESENT | | |
936 | HFI1_STATUS_INITTED; | |
937 | if (!ret) { | |
938 | /* enable all interrupts from the chip */ | |
a2f7bbdc MR |
939 | enable_general_intr(dd); |
940 | init_qsfp_int(dd); | |
77241056 MM |
941 | |
942 | /* chip is OK for user apps; mark it as initialized */ | |
943 | for (pidx = 0; pidx < dd->num_pports; ++pidx) { | |
944 | ppd = dd->pport + pidx; | |
945 | ||
4d114fdd JJ |
946 | /* |
947 | * start the serdes - must be after interrupts are | |
948 | * enabled so we are notified when the link goes up | |
77241056 | 949 | */ |
77241056 MM |
950 | lastfail = bringup_serdes(ppd); |
951 | if (lastfail) | |
952 | dd_dev_info(dd, | |
17fb4f29 JJ |
953 | "Failed to bring up port %u\n", |
954 | ppd->port); | |
77241056 MM |
955 | |
956 | /* | |
957 | * Set status even if port serdes is not initialized | |
958 | * so that diags will work. | |
959 | */ | |
960 | if (ppd->statusp) | |
961 | *ppd->statusp |= HFI1_STATUS_CHIP_PRESENT | | |
962 | HFI1_STATUS_INITTED; | |
963 | if (!ppd->link_speed_enabled) | |
964 | continue; | |
965 | } | |
966 | } | |
967 | ||
968 | /* if ret is non-zero, we probably should do some cleanup here... */ | |
969 | return ret; | |
970 | } | |
971 | ||
77241056 MM |
972 | struct hfi1_devdata *hfi1_lookup(int unit) |
973 | { | |
03b92789 | 974 | return xa_load(&hfi1_dev_table, unit); |
77241056 MM |
975 | } |
976 | ||
977 | /* | |
978 | * Stop the timers during unit shutdown, or after an error late | |
979 | * in initialization. | |
980 | */ | |
981 | static void stop_timers(struct hfi1_devdata *dd) | |
982 | { | |
983 | struct hfi1_pportdata *ppd; | |
984 | int pidx; | |
985 | ||
986 | for (pidx = 0; pidx < dd->num_pports; ++pidx) { | |
987 | ppd = dd->pport + pidx; | |
8064135e | 988 | if (ppd->led_override_timer.function) { |
77241056 MM |
989 | del_timer_sync(&ppd->led_override_timer); |
990 | atomic_set(&ppd->led_override_timer_active, 0); | |
991 | } | |
992 | } | |
993 | } | |
994 | ||
995 | /** | |
996 | * shutdown_device - shut down a device | |
997 | * @dd: the hfi1_ib device | |
998 | * | |
999 | * This is called to make the device quiet when we are about to | |
1000 | * unload the driver, and also when the device is administratively | |
1001 | * disabled. It does not free any data structures. | |
1002 | * Everything it does has to be setup again by hfi1_init(dd, 1) | |
1003 | */ | |
1004 | static void shutdown_device(struct hfi1_devdata *dd) | |
1005 | { | |
1006 | struct hfi1_pportdata *ppd; | |
d295dbeb | 1007 | struct hfi1_ctxtdata *rcd; |
77241056 MM |
1008 | unsigned pidx; |
1009 | int i; | |
1010 | ||
8d3e7113 AE |
1011 | if (dd->flags & HFI1_SHUTDOWN) |
1012 | return; | |
1013 | dd->flags |= HFI1_SHUTDOWN; | |
1014 | ||
77241056 MM |
1015 | for (pidx = 0; pidx < dd->num_pports; ++pidx) { |
1016 | ppd = dd->pport + pidx; | |
1017 | ||
1018 | ppd->linkup = 0; | |
1019 | if (ppd->statusp) | |
1020 | *ppd->statusp &= ~(HFI1_STATUS_IB_CONF | | |
1021 | HFI1_STATUS_IB_READY); | |
1022 | } | |
1023 | dd->flags &= ~HFI1_INITTED; | |
1024 | ||
a2f7bbdc MR |
1025 | /* mask and clean up interrupts */ |
1026 | set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false); | |
6eb4eb10 | 1027 | msix_clean_up_interrupts(dd); |
77241056 MM |
1028 | |
1029 | for (pidx = 0; pidx < dd->num_pports; ++pidx) { | |
d295dbeb MR |
1030 | for (i = 0; i < dd->num_rcv_contexts; i++) { |
1031 | rcd = hfi1_rcd_get_by_index(dd, i); | |
77241056 | 1032 | hfi1_rcvctrl(dd, HFI1_RCVCTRL_TAILUPD_DIS | |
2250563e MR |
1033 | HFI1_RCVCTRL_CTXT_DIS | |
1034 | HFI1_RCVCTRL_INTRAVAIL_DIS | | |
1035 | HFI1_RCVCTRL_PKEY_DIS | | |
d295dbeb MR |
1036 | HFI1_RCVCTRL_ONE_PKT_EGR_DIS, rcd); |
1037 | hfi1_rcd_put(rcd); | |
1038 | } | |
77241056 MM |
1039 | /* |
1040 | * Gracefully stop all sends allowing any in progress to | |
1041 | * trickle out first. | |
1042 | */ | |
1043 | for (i = 0; i < dd->num_send_contexts; i++) | |
1044 | sc_flush(dd->send_contexts[i].sc); | |
1045 | } | |
1046 | ||
1047 | /* | |
1048 | * Enough for anything that's going to trickle out to have actually | |
1049 | * done so. | |
1050 | */ | |
1051 | udelay(20); | |
1052 | ||
1053 | for (pidx = 0; pidx < dd->num_pports; ++pidx) { | |
1054 | ppd = dd->pport + pidx; | |
1055 | ||
1056 | /* disable all contexts */ | |
1057 | for (i = 0; i < dd->num_send_contexts; i++) | |
1058 | sc_disable(dd->send_contexts[i].sc); | |
1059 | /* disable the send device */ | |
1060 | pio_send_control(dd, PSC_GLOBAL_DISABLE); | |
1061 | ||
91ab4ed3 EH |
1062 | shutdown_led_override(ppd); |
1063 | ||
77241056 MM |
1064 | /* |
1065 | * Clear SerdesEnable. | |
1066 | * We can't count on interrupts since we are stopping. | |
1067 | */ | |
1068 | hfi1_quiet_serdes(ppd); | |
28b70cd9 KW |
1069 | if (ppd->hfi1_wq) |
1070 | flush_workqueue(ppd->hfi1_wq); | |
2315ec12 | 1071 | if (ppd->link_wq) |
28b70cd9 | 1072 | flush_workqueue(ppd->link_wq); |
77241056 MM |
1073 | } |
1074 | sdma_exit(dd); | |
1075 | } | |
1076 | ||
1077 | /** | |
1078 | * hfi1_free_ctxtdata - free a context's allocated data | |
1079 | * @dd: the hfi1_ib device | |
1080 | * @rcd: the ctxtdata structure | |
1081 | * | |
1082 | * free up any allocated data for a context | |
77241056 MM |
1083 | * It should never change any chip state, or global driver state. |
1084 | */ | |
1085 | void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) | |
1086 | { | |
f683c80c | 1087 | u32 e; |
77241056 MM |
1088 | |
1089 | if (!rcd) | |
1090 | return; | |
1091 | ||
1092 | if (rcd->rcvhdrq) { | |
b2578431 | 1093 | dma_free_coherent(&dd->pcidev->dev, rcvhdrq_size(rcd), |
60368186 | 1094 | rcd->rcvhdrq, rcd->rcvhdrq_dma); |
77241056 | 1095 | rcd->rcvhdrq = NULL; |
2fb3b5ae | 1096 | if (hfi1_rcvhdrtail_kvaddr(rcd)) { |
77241056 | 1097 | dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, |
2fb3b5ae | 1098 | (void *)hfi1_rcvhdrtail_kvaddr(rcd), |
60368186 | 1099 | rcd->rcvhdrqtailaddr_dma); |
77241056 MM |
1100 | rcd->rcvhdrtail_kvaddr = NULL; |
1101 | } | |
1102 | } | |
1103 | ||
1104 | /* all the RcvArray entries should have been cleared by now */ | |
1105 | kfree(rcd->egrbufs.rcvtids); | |
f683c80c | 1106 | rcd->egrbufs.rcvtids = NULL; |
77241056 MM |
1107 | |
1108 | for (e = 0; e < rcd->egrbufs.alloced; e++) { | |
9292f8f9 | 1109 | if (rcd->egrbufs.buffers[e].addr) |
77241056 MM |
1110 | dma_free_coherent(&dd->pcidev->dev, |
1111 | rcd->egrbufs.buffers[e].len, | |
1112 | rcd->egrbufs.buffers[e].addr, | |
60368186 | 1113 | rcd->egrbufs.buffers[e].dma); |
77241056 MM |
1114 | } |
1115 | kfree(rcd->egrbufs.buffers); | |
f683c80c MR |
1116 | rcd->egrbufs.alloced = 0; |
1117 | rcd->egrbufs.buffers = NULL; | |
77241056 MM |
1118 | |
1119 | sc_free(rcd->sc); | |
f683c80c MR |
1120 | rcd->sc = NULL; |
1121 | ||
77241056 MM |
1122 | vfree(rcd->subctxt_uregbase); |
1123 | vfree(rcd->subctxt_rcvegrbuf); | |
1124 | vfree(rcd->subctxt_rcvhdr_base); | |
77241056 | 1125 | kfree(rcd->opstats); |
f683c80c MR |
1126 | |
1127 | rcd->subctxt_uregbase = NULL; | |
1128 | rcd->subctxt_rcvegrbuf = NULL; | |
1129 | rcd->subctxt_rcvhdr_base = NULL; | |
1130 | rcd->opstats = NULL; | |
77241056 MM |
1131 | } |
1132 | ||
78eb129d DL |
1133 | /* |
1134 | * Release our hold on the shared asic data. If we are the last one, | |
dba715f0 | 1135 | * return the structure to be finalized outside the lock. Must be |
03b92789 | 1136 | * holding hfi1_dev_table lock. |
78eb129d | 1137 | */ |
dba715f0 | 1138 | static struct hfi1_asic_data *release_asic_data(struct hfi1_devdata *dd) |
78eb129d | 1139 | { |
dba715f0 | 1140 | struct hfi1_asic_data *ad; |
78eb129d DL |
1141 | int other; |
1142 | ||
1143 | if (!dd->asic_data) | |
dba715f0 | 1144 | return NULL; |
78eb129d DL |
1145 | dd->asic_data->dds[dd->hfi1_id] = NULL; |
1146 | other = dd->hfi1_id ? 0 : 1; | |
dba715f0 | 1147 | ad = dd->asic_data; |
78eb129d | 1148 | dd->asic_data = NULL; |
dba715f0 DL |
1149 | /* return NULL if the other dd still has a link */ |
1150 | return ad->dds[other] ? NULL : ad; | |
1151 | } | |
1152 | ||
1153 | static void finalize_asic_data(struct hfi1_devdata *dd, | |
1154 | struct hfi1_asic_data *ad) | |
1155 | { | |
1156 | clean_up_i2c(dd, ad); | |
1157 | kfree(ad); | |
78eb129d DL |
1158 | } |
1159 | ||
e9777ad4 | 1160 | /** |
5ab17a24 | 1161 | * hfi1_free_devdata - cleans up and frees per-unit data structure |
e9777ad4 SS |
1162 | * @dd: pointer to a valid devdata structure |
1163 | * | |
5ab17a24 | 1164 | * It cleans up and frees all data structures set up by |
e9777ad4 SS |
1165 | * by hfi1_alloc_devdata(). |
1166 | */ | |
5ab17a24 | 1167 | void hfi1_free_devdata(struct hfi1_devdata *dd) |
77241056 | 1168 | { |
dba715f0 | 1169 | struct hfi1_asic_data *ad; |
77241056 MM |
1170 | unsigned long flags; |
1171 | ||
03b92789 MW |
1172 | xa_lock_irqsave(&hfi1_dev_table, flags); |
1173 | __xa_erase(&hfi1_dev_table, dd->unit); | |
dba715f0 | 1174 | ad = release_asic_data(dd); |
03b92789 | 1175 | xa_unlock_irqrestore(&hfi1_dev_table, flags); |
e9777ad4 SS |
1176 | |
1177 | finalize_asic_data(dd, ad); | |
c3838b39 | 1178 | free_platform_config(dd); |
77241056 MM |
1179 | rcu_barrier(); /* wait for rcu callbacks to complete */ |
1180 | free_percpu(dd->int_counter); | |
1181 | free_percpu(dd->rcv_limit); | |
89abfc8d | 1182 | free_percpu(dd->send_schedule); |
1b311f89 | 1183 | free_percpu(dd->tx_opstats); |
e9777ad4 SS |
1184 | dd->int_counter = NULL; |
1185 | dd->rcv_limit = NULL; | |
1186 | dd->send_schedule = NULL; | |
1187 | dd->tx_opstats = NULL; | |
5d18ee67 SS |
1188 | kfree(dd->comp_vect); |
1189 | dd->comp_vect = NULL; | |
60a8b5a1 MM |
1190 | if (dd->rcvhdrtail_dummy_kvaddr) |
1191 | dma_free_coherent(&dd->pcidev->dev, sizeof(u64), | |
1192 | (void *)dd->rcvhdrtail_dummy_kvaddr, | |
1193 | dd->rcvhdrtail_dummy_dma); | |
1194 | dd->rcvhdrtail_dummy_kvaddr = NULL; | |
473291b3 | 1195 | sdma_clean(dd, dd->num_sdma); |
ea0e4ce3 | 1196 | rvt_dealloc_device(&dd->verbs_dev.rdi); |
77241056 MM |
1197 | } |
1198 | ||
57f97e96 MR |
1199 | /** |
1200 | * hfi1_alloc_devdata - Allocate our primary per-unit data structure. | |
1201 | * @pdev: Valid PCI device | |
1202 | * @extra: How many bytes to alloc past the default | |
1203 | * | |
1204 | * Must be done via verbs allocator, because the verbs cleanup process | |
1205 | * both does cleanup and free of the data structure. | |
77241056 | 1206 | * "extra" is for chip-specific data. |
77241056 | 1207 | */ |
57f97e96 MR |
1208 | static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, |
1209 | size_t extra) | |
77241056 | 1210 | { |
77241056 | 1211 | struct hfi1_devdata *dd; |
7af6d006 | 1212 | int ret, nports; |
77241056 | 1213 | |
7af6d006 DD |
1214 | /* extra is * number of ports */ |
1215 | nports = extra / sizeof(struct hfi1_pportdata); | |
77241056 | 1216 | |
7af6d006 DD |
1217 | dd = (struct hfi1_devdata *)rvt_alloc_device(sizeof(*dd) + extra, |
1218 | nports); | |
77241056 MM |
1219 | if (!dd) |
1220 | return ERR_PTR(-ENOMEM); | |
7af6d006 | 1221 | dd->num_pports = nports; |
77241056 | 1222 | dd->pport = (struct hfi1_pportdata *)(dd + 1); |
45d92457 SS |
1223 | dd->pcidev = pdev; |
1224 | pci_set_drvdata(pdev, dd); | |
77241056 | 1225 | |
03b92789 MW |
1226 | ret = xa_alloc_irq(&hfi1_dev_table, &dd->unit, dd, xa_limit_32b, |
1227 | GFP_KERNEL); | |
77241056 | 1228 | if (ret < 0) { |
57f97e96 MR |
1229 | dev_err(&pdev->dev, |
1230 | "Could not allocate unit ID: error %d\n", -ret); | |
77241056 MM |
1231 | goto bail; |
1232 | } | |
5084c8ff | 1233 | rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit); |
5de61a47 MM |
1234 | /* |
1235 | * If the BIOS does not have the NUMA node information set, select | |
1236 | * NUMA 0 so we get consistent performance. | |
1237 | */ | |
1238 | dd->node = pcibus_to_node(pdev->bus); | |
1239 | if (dd->node == NUMA_NO_NODE) { | |
1240 | dd_dev_err(dd, "Invalid PCI NUMA node. Performance may be affected\n"); | |
1241 | dd->node = 0; | |
1242 | } | |
5084c8ff | 1243 | |
77241056 MM |
1244 | /* |
1245 | * Initialize all locks for the device. This needs to be as early as | |
1246 | * possible so locks are usable. | |
1247 | */ | |
1248 | spin_lock_init(&dd->sc_lock); | |
1249 | spin_lock_init(&dd->sendctrl_lock); | |
1250 | spin_lock_init(&dd->rcvctrl_lock); | |
1251 | spin_lock_init(&dd->uctxt_lock); | |
1252 | spin_lock_init(&dd->hfi1_diag_trans_lock); | |
1253 | spin_lock_init(&dd->sc_init_lock); | |
77241056 | 1254 | spin_lock_init(&dd->dc8051_memlock); |
77241056 MM |
1255 | seqlock_init(&dd->sc2vl_lock); |
1256 | spin_lock_init(&dd->sde_map_lock); | |
35f6befc | 1257 | spin_lock_init(&dd->pio_map_lock); |
22546b74 | 1258 | mutex_init(&dd->dc8051_lock); |
77241056 | 1259 | init_waitqueue_head(&dd->event_queue); |
a2f7bbdc | 1260 | spin_lock_init(&dd->irq_src_lock); |
77241056 MM |
1261 | |
1262 | dd->int_counter = alloc_percpu(u64); | |
1263 | if (!dd->int_counter) { | |
1264 | ret = -ENOMEM; | |
77241056 MM |
1265 | goto bail; |
1266 | } | |
1267 | ||
1268 | dd->rcv_limit = alloc_percpu(u64); | |
1269 | if (!dd->rcv_limit) { | |
1270 | ret = -ENOMEM; | |
77241056 MM |
1271 | goto bail; |
1272 | } | |
1273 | ||
89abfc8d VM |
1274 | dd->send_schedule = alloc_percpu(u64); |
1275 | if (!dd->send_schedule) { | |
1276 | ret = -ENOMEM; | |
89abfc8d VM |
1277 | goto bail; |
1278 | } | |
1279 | ||
1b311f89 MM |
1280 | dd->tx_opstats = alloc_percpu(struct hfi1_opcode_stats_perctx); |
1281 | if (!dd->tx_opstats) { | |
1282 | ret = -ENOMEM; | |
1283 | goto bail; | |
1284 | } | |
1285 | ||
5d18ee67 SS |
1286 | dd->comp_vect = kzalloc(sizeof(*dd->comp_vect), GFP_KERNEL); |
1287 | if (!dd->comp_vect) { | |
1288 | ret = -ENOMEM; | |
1289 | goto bail; | |
1290 | } | |
1291 | ||
60a8b5a1 MM |
1292 | /* allocate dummy tail memory for all receive contexts */ |
1293 | dd->rcvhdrtail_dummy_kvaddr = | |
1294 | dma_alloc_coherent(&dd->pcidev->dev, sizeof(u64), | |
1295 | &dd->rcvhdrtail_dummy_dma, GFP_KERNEL); | |
1296 | if (!dd->rcvhdrtail_dummy_kvaddr) { | |
1297 | ret = -ENOMEM; | |
1298 | goto bail; | |
1299 | } | |
1300 | ||
19d8b90a | 1301 | atomic_set(&dd->ipoib_rsm_usr_num, 0); |
77241056 MM |
1302 | return dd; |
1303 | ||
1304 | bail: | |
5ab17a24 | 1305 | hfi1_free_devdata(dd); |
77241056 MM |
1306 | return ERR_PTR(ret); |
1307 | } | |
1308 | ||
1309 | /* | |
1310 | * Called from freeze mode handlers, and from PCI error | |
1311 | * reporting code. Should be paranoid about state of | |
1312 | * system and data structures. | |
1313 | */ | |
1314 | void hfi1_disable_after_error(struct hfi1_devdata *dd) | |
1315 | { | |
1316 | if (dd->flags & HFI1_INITTED) { | |
1317 | u32 pidx; | |
1318 | ||
1319 | dd->flags &= ~HFI1_INITTED; | |
1320 | if (dd->pport) | |
1321 | for (pidx = 0; pidx < dd->num_pports; ++pidx) { | |
1322 | struct hfi1_pportdata *ppd; | |
1323 | ||
1324 | ppd = dd->pport + pidx; | |
1325 | if (dd->flags & HFI1_PRESENT) | |
1326 | set_link_state(ppd, HLS_DN_DISABLE); | |
1327 | ||
1328 | if (ppd->statusp) | |
1329 | *ppd->statusp &= ~HFI1_STATUS_IB_READY; | |
1330 | } | |
1331 | } | |
1332 | ||
1333 | /* | |
1334 | * Mark as having had an error for driver, and also | |
1335 | * for /sys and status word mapped to user programs. | |
1336 | * This marks unit as not usable, until reset. | |
1337 | */ | |
1338 | if (dd->status) | |
1339 | dd->status->dev |= HFI1_STATUS_HWERROR; | |
1340 | } | |
1341 | ||
1342 | static void remove_one(struct pci_dev *); | |
1343 | static int init_one(struct pci_dev *, const struct pci_device_id *); | |
8d3e7113 | 1344 | static void shutdown_one(struct pci_dev *); |
77241056 | 1345 | |
ddf65f28 | 1346 | #define DRIVER_LOAD_MSG "Cornelis " DRIVER_NAME " loaded: " |
77241056 MM |
1347 | #define PFX DRIVER_NAME ": " |
1348 | ||
d6373019 | 1349 | const struct pci_device_id hfi1_pci_tbl[] = { |
77241056 MM |
1350 | { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL0) }, |
1351 | { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL1) }, | |
1352 | { 0, } | |
1353 | }; | |
1354 | ||
1355 | MODULE_DEVICE_TABLE(pci, hfi1_pci_tbl); | |
1356 | ||
1357 | static struct pci_driver hfi1_pci_driver = { | |
1358 | .name = DRIVER_NAME, | |
1359 | .probe = init_one, | |
1360 | .remove = remove_one, | |
8d3e7113 | 1361 | .shutdown = shutdown_one, |
77241056 MM |
1362 | .id_table = hfi1_pci_tbl, |
1363 | .err_handler = &hfi1_pci_err_handler, | |
1364 | }; | |
1365 | ||
1366 | static void __init compute_krcvqs(void) | |
1367 | { | |
1368 | int i; | |
1369 | ||
1370 | for (i = 0; i < krcvqsset; i++) | |
1371 | n_krcvqs += krcvqs[i]; | |
1372 | } | |
1373 | ||
1374 | /* | |
1375 | * Do all the generic driver unit- and chip-independent memory | |
1376 | * allocation and initialization. | |
1377 | */ | |
1378 | static int __init hfi1_mod_init(void) | |
1379 | { | |
1380 | int ret; | |
1381 | ||
1382 | ret = dev_init(); | |
1383 | if (ret) | |
1384 | goto bail; | |
1385 | ||
d6373019 SS |
1386 | ret = node_affinity_init(); |
1387 | if (ret) | |
1388 | goto bail; | |
4197344b | 1389 | |
77241056 MM |
1390 | /* validate max MTU before any devices start */ |
1391 | if (!valid_opa_max_mtu(hfi1_max_mtu)) { | |
1392 | pr_err("Invalid max_mtu 0x%x, using 0x%x instead\n", | |
1393 | hfi1_max_mtu, HFI1_DEFAULT_MAX_MTU); | |
1394 | hfi1_max_mtu = HFI1_DEFAULT_MAX_MTU; | |
1395 | } | |
1396 | /* valid CUs run from 1-128 in powers of 2 */ | |
1397 | if (hfi1_cu > 128 || !is_power_of_2(hfi1_cu)) | |
1398 | hfi1_cu = 1; | |
1399 | /* valid credit return threshold is 0-100, variable is unsigned */ | |
1400 | if (user_credit_return_threshold > 100) | |
1401 | user_credit_return_threshold = 100; | |
1402 | ||
1403 | compute_krcvqs(); | |
4d114fdd JJ |
1404 | /* |
1405 | * sanitize receive interrupt count, time must wait until after | |
1406 | * the hardware type is known | |
1407 | */ | |
77241056 MM |
1408 | if (rcv_intr_count > RCV_HDR_HEAD_COUNTER_MASK) |
1409 | rcv_intr_count = RCV_HDR_HEAD_COUNTER_MASK; | |
1410 | /* reject invalid combinations */ | |
1411 | if (rcv_intr_count == 0 && rcv_intr_timeout == 0) { | |
1412 | pr_err("Invalid mode: both receive interrupt count and available timeout are zero - setting interrupt count to 1\n"); | |
1413 | rcv_intr_count = 1; | |
1414 | } | |
1415 | if (rcv_intr_count > 1 && rcv_intr_timeout == 0) { | |
1416 | /* | |
1417 | * Avoid indefinite packet delivery by requiring a timeout | |
1418 | * if count is > 1. | |
1419 | */ | |
1420 | pr_err("Invalid mode: receive interrupt count greater than 1 and available timeout is zero - setting available timeout to 1\n"); | |
1421 | rcv_intr_timeout = 1; | |
1422 | } | |
1423 | if (rcv_intr_dynamic && !(rcv_intr_count > 1 && rcv_intr_timeout > 0)) { | |
1424 | /* | |
1425 | * The dynamic algorithm expects a non-zero timeout | |
1426 | * and a count > 1. | |
1427 | */ | |
1428 | pr_err("Invalid mode: dynamic receive interrupt mitigation with invalid count and timeout - turning dynamic off\n"); | |
1429 | rcv_intr_dynamic = 0; | |
1430 | } | |
1431 | ||
1432 | /* sanitize link CRC options */ | |
1433 | link_crc_mask &= SUPPORTED_CRCS; | |
1434 | ||
48a615dc KW |
1435 | ret = opfn_init(); |
1436 | if (ret < 0) { | |
1437 | pr_err("Failed to allocate opfn_wq"); | |
1438 | goto bail_dev; | |
1439 | } | |
1440 | ||
77241056 MM |
1441 | /* |
1442 | * These must be called before the driver is registered with | |
1443 | * the PCI subsystem. | |
1444 | */ | |
77241056 MM |
1445 | hfi1_dbg_init(); |
1446 | ret = pci_register_driver(&hfi1_pci_driver); | |
1447 | if (ret < 0) { | |
1448 | pr_err("Unable to register driver: error %d\n", -ret); | |
1449 | goto bail_dev; | |
1450 | } | |
1451 | goto bail; /* all OK */ | |
1452 | ||
1453 | bail_dev: | |
1454 | hfi1_dbg_exit(); | |
77241056 MM |
1455 | dev_cleanup(); |
1456 | bail: | |
1457 | return ret; | |
1458 | } | |
1459 | ||
1460 | module_init(hfi1_mod_init); | |
1461 | ||
1462 | /* | |
1463 | * Do the non-unit driver cleanup, memory free, etc. at unload. | |
1464 | */ | |
1465 | static void __exit hfi1_mod_cleanup(void) | |
1466 | { | |
1467 | pci_unregister_driver(&hfi1_pci_driver); | |
48a615dc | 1468 | opfn_exit(); |
5d18ee67 | 1469 | node_affinity_destroy_all(); |
77241056 | 1470 | hfi1_dbg_exit(); |
77241056 | 1471 | |
03b92789 | 1472 | WARN_ON(!xa_empty(&hfi1_dev_table)); |
77241056 MM |
1473 | dispose_firmware(); /* asymmetric with obtain_firmware() */ |
1474 | dev_cleanup(); | |
1475 | } | |
1476 | ||
1477 | module_exit(hfi1_mod_cleanup); | |
1478 | ||
1479 | /* this can only be called after a successful initialization */ | |
1480 | static void cleanup_device_data(struct hfi1_devdata *dd) | |
1481 | { | |
1482 | int ctxt; | |
1483 | int pidx; | |
77241056 MM |
1484 | |
1485 | /* users can't do anything more with chip */ | |
1486 | for (pidx = 0; pidx < dd->num_pports; ++pidx) { | |
1487 | struct hfi1_pportdata *ppd = &dd->pport[pidx]; | |
1488 | struct cc_state *cc_state; | |
1489 | int i; | |
1490 | ||
1491 | if (ppd->statusp) | |
1492 | *ppd->statusp &= ~HFI1_STATUS_CHIP_PRESENT; | |
1493 | ||
1494 | for (i = 0; i < OPA_MAX_SLS; i++) | |
1495 | hrtimer_cancel(&ppd->cca_timer[i].hrtimer); | |
1496 | ||
1497 | spin_lock(&ppd->cc_state_lock); | |
8adf71fa | 1498 | cc_state = get_cc_state_protected(ppd); |
eea57078 | 1499 | RCU_INIT_POINTER(ppd->cc_state, NULL); |
77241056 MM |
1500 | spin_unlock(&ppd->cc_state_lock); |
1501 | ||
1502 | if (cc_state) | |
476d95bd | 1503 | kfree_rcu(cc_state, rcu); |
77241056 MM |
1504 | } |
1505 | ||
1506 | free_credit_return(dd); | |
1507 | ||
d295dbeb MR |
1508 | /* |
1509 | * Free any resources still in use (usually just kernel contexts) | |
1510 | * at unload; we do for ctxtcnt, because that's what we allocate. | |
1511 | */ | |
1512 | for (ctxt = 0; dd->rcd && ctxt < dd->num_rcv_contexts; ctxt++) { | |
1513 | struct hfi1_ctxtdata *rcd = dd->rcd[ctxt]; | |
77241056 | 1514 | |
77241056 | 1515 | if (rcd) { |
838b6fd2 | 1516 | hfi1_free_ctxt_rcv_groups(rcd); |
d295dbeb | 1517 | hfi1_free_ctxt(rcd); |
77241056 MM |
1518 | } |
1519 | } | |
d295dbeb MR |
1520 | |
1521 | kfree(dd->rcd); | |
1522 | dd->rcd = NULL; | |
1523 | ||
35f6befc | 1524 | free_pio_map(dd); |
77241056 MM |
1525 | /* must follow rcv context free - need to remove rcv's hooks */ |
1526 | for (ctxt = 0; ctxt < dd->num_send_contexts; ctxt++) | |
1527 | sc_free(dd->send_contexts[ctxt].sc); | |
1528 | dd->num_send_contexts = 0; | |
1529 | kfree(dd->send_contexts); | |
1530 | dd->send_contexts = NULL; | |
79d0c088 JJ |
1531 | kfree(dd->hw_to_sw); |
1532 | dd->hw_to_sw = NULL; | |
77241056 MM |
1533 | kfree(dd->boardname); |
1534 | vfree(dd->events); | |
1535 | vfree(dd->status); | |
77241056 MM |
1536 | } |
1537 | ||
1538 | /* | |
1539 | * Clean up on unit shutdown, or error during unit load after | |
1540 | * successful initialization. | |
1541 | */ | |
1542 | static void postinit_cleanup(struct hfi1_devdata *dd) | |
1543 | { | |
1544 | hfi1_start_cleanup(dd); | |
5d18ee67 SS |
1545 | hfi1_comp_vectors_clean_up(dd); |
1546 | hfi1_dev_affinity_clean_up(dd); | |
77241056 MM |
1547 | |
1548 | hfi1_pcie_ddcleanup(dd); | |
1549 | hfi1_pcie_cleanup(dd->pcidev); | |
1550 | ||
1551 | cleanup_device_data(dd); | |
1552 | ||
1553 | hfi1_free_devdata(dd); | |
1554 | } | |
1555 | ||
1556 | static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) | |
1557 | { | |
1558 | int ret = 0, j, pidx, initfail; | |
83fb4af6 | 1559 | struct hfi1_devdata *dd; |
e8597eb0 | 1560 | struct hfi1_pportdata *ppd; |
77241056 MM |
1561 | |
1562 | /* First, lock the non-writable module parameters */ | |
1563 | HFI1_CAP_LOCK(); | |
1564 | ||
5d6f08af TS |
1565 | /* Validate dev ids */ |
1566 | if (!(ent->device == PCI_DEVICE_ID_INTEL0 || | |
1567 | ent->device == PCI_DEVICE_ID_INTEL1)) { | |
57f97e96 MR |
1568 | dev_err(&pdev->dev, "Failing on unknown Intel deviceid 0x%x\n", |
1569 | ent->device); | |
5d6f08af TS |
1570 | ret = -ENODEV; |
1571 | goto bail; | |
1572 | } | |
1573 | ||
57f97e96 MR |
1574 | /* Allocate the dd so we can get to work */ |
1575 | dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS * | |
1576 | sizeof(struct hfi1_pportdata)); | |
1577 | if (IS_ERR(dd)) { | |
1578 | ret = PTR_ERR(dd); | |
1579 | goto bail; | |
1580 | } | |
1581 | ||
77241056 | 1582 | /* Validate some global module parameters */ |
de730f71 | 1583 | ret = hfi1_validate_rcvhdrcnt(dd, rcvhdrcnt); |
11501ab9 | 1584 | if (ret) |
e002dcc0 | 1585 | goto bail; |
11501ab9 | 1586 | |
77241056 MM |
1587 | /* use the encoding function as a sanitization check */ |
1588 | if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) { | |
57f97e96 MR |
1589 | dd_dev_err(dd, "Invalid HdrQ Entry size %u\n", |
1590 | hfi1_hdrq_entsize); | |
07859def | 1591 | ret = -EINVAL; |
77241056 MM |
1592 | goto bail; |
1593 | } | |
1594 | ||
1595 | /* The receive eager buffer size must be set before the receive | |
1596 | * contexts are created. | |
1597 | * | |
1598 | * Set the eager buffer size. Validate that it falls in a range | |
1599 | * allowed by the hardware - all powers of 2 between the min and | |
1600 | * max. The maximum valid MTU is within the eager buffer range | |
1601 | * so we do not need to cap the max_mtu by an eager buffer size | |
1602 | * setting. | |
1603 | */ | |
1604 | if (eager_buffer_size) { | |
1605 | if (!is_power_of_2(eager_buffer_size)) | |
1606 | eager_buffer_size = | |
1607 | roundup_pow_of_two(eager_buffer_size); | |
1608 | eager_buffer_size = | |
1609 | clamp_val(eager_buffer_size, | |
1610 | MIN_EAGER_BUFFER * 8, | |
1611 | MAX_EAGER_BUFFER_TOTAL); | |
57f97e96 MR |
1612 | dd_dev_info(dd, "Eager buffer size %u\n", |
1613 | eager_buffer_size); | |
77241056 | 1614 | } else { |
57f97e96 | 1615 | dd_dev_err(dd, "Invalid Eager buffer size of 0\n"); |
77241056 MM |
1616 | ret = -EINVAL; |
1617 | goto bail; | |
1618 | } | |
1619 | ||
1620 | /* restrict value of hfi1_rcvarr_split */ | |
1621 | hfi1_rcvarr_split = clamp_val(hfi1_rcvarr_split, 0, 100); | |
1622 | ||
57f97e96 | 1623 | ret = hfi1_pcie_init(dd); |
77241056 MM |
1624 | if (ret) |
1625 | goto bail; | |
1626 | ||
83fb4af6 KB |
1627 | /* |
1628 | * Do device-specific initialization, function table setup, dd | |
1629 | * allocation, etc. | |
1630 | */ | |
57f97e96 MR |
1631 | ret = hfi1_init_dd(dd); |
1632 | if (ret) | |
77241056 MM |
1633 | goto clean_bail; /* error already printed */ |
1634 | ||
1635 | ret = create_workqueues(dd); | |
1636 | if (ret) | |
1637 | goto clean_bail; | |
1638 | ||
1639 | /* do the generic initialization */ | |
1640 | initfail = hfi1_init(dd, 0); | |
1641 | ||
1642 | ret = hfi1_register_ib_device(dd); | |
1643 | ||
1644 | /* | |
1645 | * Now ready for use. this should be cleared whenever we | |
1646 | * detect a reset, or initiate one. If earlier failure, | |
1647 | * we still create devices, so diags, etc. can be used | |
1648 | * to determine cause of problem. | |
1649 | */ | |
ed6f653f | 1650 | if (!initfail && !ret) { |
77241056 | 1651 | dd->flags |= HFI1_INITTED; |
ed6f653f DL |
1652 | /* create debufs files after init and ib register */ |
1653 | hfi1_dbg_ibdev_init(&dd->verbs_dev); | |
1654 | } | |
77241056 MM |
1655 | |
1656 | j = hfi1_device_create(dd); | |
1657 | if (j) | |
1658 | dd_dev_err(dd, "Failed to create /dev devices: %d\n", -j); | |
1659 | ||
1660 | if (initfail || ret) { | |
6eb4eb10 | 1661 | msix_clean_up_interrupts(dd); |
77241056 MM |
1662 | stop_timers(dd); |
1663 | flush_workqueue(ib_wq); | |
e8597eb0 | 1664 | for (pidx = 0; pidx < dd->num_pports; ++pidx) { |
77241056 | 1665 | hfi1_quiet_serdes(dd->pport + pidx); |
e8597eb0 HC |
1666 | ppd = dd->pport + pidx; |
1667 | if (ppd->hfi1_wq) { | |
1668 | destroy_workqueue(ppd->hfi1_wq); | |
1669 | ppd->hfi1_wq = NULL; | |
1670 | } | |
71d47008 SS |
1671 | if (ppd->link_wq) { |
1672 | destroy_workqueue(ppd->link_wq); | |
1673 | ppd->link_wq = NULL; | |
1674 | } | |
e8597eb0 | 1675 | } |
77241056 MM |
1676 | if (!j) |
1677 | hfi1_device_remove(dd); | |
1678 | if (!ret) | |
1679 | hfi1_unregister_ib_device(dd); | |
1680 | postinit_cleanup(dd); | |
1681 | if (initfail) | |
1682 | ret = initfail; | |
1683 | goto bail; /* everything already cleaned */ | |
1684 | } | |
1685 | ||
1686 | sdma_start(dd); | |
1687 | ||
1688 | return 0; | |
1689 | ||
1690 | clean_bail: | |
1691 | hfi1_pcie_cleanup(pdev); | |
1692 | bail: | |
1693 | return ret; | |
1694 | } | |
1695 | ||
acd7c8fe TS |
1696 | static void wait_for_clients(struct hfi1_devdata *dd) |
1697 | { | |
1698 | /* | |
1699 | * Remove the device init value and complete the device if there is | |
1700 | * no clients or wait for active clients to finish. | |
1701 | */ | |
a0293eb2 | 1702 | if (refcount_dec_and_test(&dd->user_refcount)) |
acd7c8fe TS |
1703 | complete(&dd->user_comp); |
1704 | ||
1705 | wait_for_completion(&dd->user_comp); | |
1706 | } | |
1707 | ||
77241056 MM |
1708 | static void remove_one(struct pci_dev *pdev) |
1709 | { | |
1710 | struct hfi1_devdata *dd = pci_get_drvdata(pdev); | |
1711 | ||
ed6f653f DL |
1712 | /* close debugfs files before ib unregister */ |
1713 | hfi1_dbg_ibdev_exit(&dd->verbs_dev); | |
acd7c8fe TS |
1714 | |
1715 | /* remove the /dev hfi1 interface */ | |
1716 | hfi1_device_remove(dd); | |
1717 | ||
1718 | /* wait for existing user space clients to finish */ | |
1719 | wait_for_clients(dd); | |
1720 | ||
77241056 MM |
1721 | /* unregister from IB core */ |
1722 | hfi1_unregister_ib_device(dd); | |
1723 | ||
4730f4a6 | 1724 | /* free netdev data */ |
780278c2 | 1725 | hfi1_free_rx(dd); |
d4829ea6 | 1726 | |
77241056 MM |
1727 | /* |
1728 | * Disable the IB link, disable interrupts on the device, | |
1729 | * clear dma engines, etc. | |
1730 | */ | |
1731 | shutdown_device(dd); | |
28b70cd9 | 1732 | destroy_workqueues(dd); |
77241056 MM |
1733 | |
1734 | stop_timers(dd); | |
1735 | ||
1736 | /* wait until all of our (qsfp) queue_work() calls complete */ | |
1737 | flush_workqueue(ib_wq); | |
1738 | ||
77241056 MM |
1739 | postinit_cleanup(dd); |
1740 | } | |
1741 | ||
8d3e7113 AE |
1742 | static void shutdown_one(struct pci_dev *pdev) |
1743 | { | |
1744 | struct hfi1_devdata *dd = pci_get_drvdata(pdev); | |
1745 | ||
1746 | shutdown_device(dd); | |
1747 | } | |
1748 | ||
77241056 MM |
1749 | /** |
1750 | * hfi1_create_rcvhdrq - create a receive header queue | |
1751 | * @dd: the hfi1_ib device | |
1752 | * @rcd: the context data | |
1753 | * | |
1754 | * This must be contiguous memory (from an i/o perspective), and must be | |
1755 | * DMA'able (which means for some systems, it will go through an IOMMU, | |
1756 | * or be forced into a low address range). | |
1757 | */ | |
1758 | int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) | |
1759 | { | |
1760 | unsigned amt; | |
77241056 MM |
1761 | |
1762 | if (!rcd->rcvhdrq) { | |
b2578431 | 1763 | amt = rcvhdrq_size(rcd); |
77241056 | 1764 | |
750afb08 LC |
1765 | rcd->rcvhdrq = dma_alloc_coherent(&dd->pcidev->dev, amt, |
1766 | &rcd->rcvhdrq_dma, | |
82c310c3 | 1767 | GFP_KERNEL); |
77241056 MM |
1768 | |
1769 | if (!rcd->rcvhdrq) { | |
1770 | dd_dev_err(dd, | |
17fb4f29 JJ |
1771 | "attempt to allocate %d bytes for ctxt %u rcvhdrq failed\n", |
1772 | amt, rcd->ctxt); | |
77241056 MM |
1773 | goto bail; |
1774 | } | |
1775 | ||
1bc0299d MM |
1776 | if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) || |
1777 | HFI1_CAP_UGET_MASK(rcd->flags, DMA_RTAIL)) { | |
750afb08 LC |
1778 | rcd->rcvhdrtail_kvaddr = dma_alloc_coherent(&dd->pcidev->dev, |
1779 | PAGE_SIZE, | |
1780 | &rcd->rcvhdrqtailaddr_dma, | |
82c310c3 | 1781 | GFP_KERNEL); |
77241056 MM |
1782 | if (!rcd->rcvhdrtail_kvaddr) |
1783 | goto bail_free; | |
77241056 | 1784 | } |
77241056 | 1785 | } |
46b010d3 | 1786 | |
de730f71 MM |
1787 | set_hdrq_regs(rcd->dd, rcd->ctxt, rcd->rcvhdrqentsize, |
1788 | rcd->rcvhdrq_cnt); | |
46b010d3 | 1789 | |
77241056 MM |
1790 | return 0; |
1791 | ||
1792 | bail_free: | |
1793 | dd_dev_err(dd, | |
17fb4f29 JJ |
1794 | "attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n", |
1795 | rcd->ctxt); | |
77241056 | 1796 | dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq, |
60368186 | 1797 | rcd->rcvhdrq_dma); |
77241056 MM |
1798 | rcd->rcvhdrq = NULL; |
1799 | bail: | |
1800 | return -ENOMEM; | |
1801 | } | |
1802 | ||
1803 | /** | |
ae360f41 LR |
1804 | * hfi1_setup_eagerbufs - llocate eager buffers, both kernel and user |
1805 | * contexts. | |
77241056 MM |
1806 | * @rcd: the context we are setting up. |
1807 | * | |
1808 | * Allocate the eager TID buffers and program them into hip. | |
1809 | * They are no longer completely contiguous, we do multiple allocation | |
1810 | * calls. Otherwise we get the OOM code involved, by asking for too | |
1811 | * much per call, with disastrous results on some kernels. | |
1812 | */ | |
1813 | int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) | |
1814 | { | |
1815 | struct hfi1_devdata *dd = rcd->dd; | |
071e4fec | 1816 | u32 max_entries, egrtop, alloced_bytes = 0; |
071e4fec | 1817 | u16 order, idx = 0; |
77241056 MM |
1818 | int ret = 0; |
1819 | u16 round_mtu = roundup_pow_of_two(hfi1_max_mtu); | |
1820 | ||
77241056 MM |
1821 | /* |
1822 | * The minimum size of the eager buffers is a groups of MTU-sized | |
1823 | * buffers. | |
1824 | * The global eager_buffer_size parameter is checked against the | |
1825 | * theoretical lower limit of the value. Here, we check against the | |
1826 | * MTU. | |
1827 | */ | |
1828 | if (rcd->egrbufs.size < (round_mtu * dd->rcv_entries.group_size)) | |
1829 | rcd->egrbufs.size = round_mtu * dd->rcv_entries.group_size; | |
1830 | /* | |
1831 | * If using one-pkt-per-egr-buffer, lower the eager buffer | |
1832 | * size to the max MTU (page-aligned). | |
1833 | */ | |
1834 | if (!HFI1_CAP_KGET_MASK(rcd->flags, MULTI_PKT_EGR)) | |
1835 | rcd->egrbufs.rcvtid_size = round_mtu; | |
1836 | ||
1837 | /* | |
1838 | * Eager buffers sizes of 1MB or less require smaller TID sizes | |
1839 | * to satisfy the "multiple of 8 RcvArray entries" requirement. | |
1840 | */ | |
1841 | if (rcd->egrbufs.size <= (1 << 20)) | |
1842 | rcd->egrbufs.rcvtid_size = max((unsigned long)round_mtu, | |
1843 | rounddown_pow_of_two(rcd->egrbufs.size / 8)); | |
1844 | ||
1845 | while (alloced_bytes < rcd->egrbufs.size && | |
1846 | rcd->egrbufs.alloced < rcd->egrbufs.count) { | |
1847 | rcd->egrbufs.buffers[idx].addr = | |
750afb08 LC |
1848 | dma_alloc_coherent(&dd->pcidev->dev, |
1849 | rcd->egrbufs.rcvtid_size, | |
1850 | &rcd->egrbufs.buffers[idx].dma, | |
82c310c3 | 1851 | GFP_KERNEL); |
77241056 MM |
1852 | if (rcd->egrbufs.buffers[idx].addr) { |
1853 | rcd->egrbufs.buffers[idx].len = | |
1854 | rcd->egrbufs.rcvtid_size; | |
1855 | rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].addr = | |
1856 | rcd->egrbufs.buffers[idx].addr; | |
60368186 TK |
1857 | rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].dma = |
1858 | rcd->egrbufs.buffers[idx].dma; | |
77241056 MM |
1859 | rcd->egrbufs.alloced++; |
1860 | alloced_bytes += rcd->egrbufs.rcvtid_size; | |
1861 | idx++; | |
1862 | } else { | |
1863 | u32 new_size, i, j; | |
1864 | u64 offset = 0; | |
1865 | ||
1866 | /* | |
1867 | * Fail the eager buffer allocation if: | |
1868 | * - we are already using the lowest acceptable size | |
1869 | * - we are using one-pkt-per-egr-buffer (this implies | |
1870 | * that we are accepting only one size) | |
1871 | */ | |
1872 | if (rcd->egrbufs.rcvtid_size == round_mtu || | |
1873 | !HFI1_CAP_KGET_MASK(rcd->flags, MULTI_PKT_EGR)) { | |
1874 | dd_dev_err(dd, "ctxt%u: Failed to allocate eager buffers\n", | |
17fb4f29 | 1875 | rcd->ctxt); |
94679061 | 1876 | ret = -ENOMEM; |
77241056 MM |
1877 | goto bail_rcvegrbuf_phys; |
1878 | } | |
1879 | ||
1880 | new_size = rcd->egrbufs.rcvtid_size / 2; | |
1881 | ||
1882 | /* | |
1883 | * If the first attempt to allocate memory failed, don't | |
1884 | * fail everything but continue with the next lower | |
1885 | * size. | |
1886 | */ | |
1887 | if (idx == 0) { | |
1888 | rcd->egrbufs.rcvtid_size = new_size; | |
1889 | continue; | |
1890 | } | |
1891 | ||
1892 | /* | |
1893 | * Re-partition already allocated buffers to a smaller | |
1894 | * size. | |
1895 | */ | |
1896 | rcd->egrbufs.alloced = 0; | |
1897 | for (i = 0, j = 0, offset = 0; j < idx; i++) { | |
1898 | if (i >= rcd->egrbufs.count) | |
1899 | break; | |
60368186 TK |
1900 | rcd->egrbufs.rcvtids[i].dma = |
1901 | rcd->egrbufs.buffers[j].dma + offset; | |
77241056 MM |
1902 | rcd->egrbufs.rcvtids[i].addr = |
1903 | rcd->egrbufs.buffers[j].addr + offset; | |
1904 | rcd->egrbufs.alloced++; | |
60368186 | 1905 | if ((rcd->egrbufs.buffers[j].dma + offset + |
77241056 | 1906 | new_size) == |
60368186 | 1907 | (rcd->egrbufs.buffers[j].dma + |
77241056 MM |
1908 | rcd->egrbufs.buffers[j].len)) { |
1909 | j++; | |
1910 | offset = 0; | |
e490974e | 1911 | } else { |
77241056 | 1912 | offset += new_size; |
e490974e | 1913 | } |
77241056 MM |
1914 | } |
1915 | rcd->egrbufs.rcvtid_size = new_size; | |
1916 | } | |
1917 | } | |
1918 | rcd->egrbufs.numbufs = idx; | |
1919 | rcd->egrbufs.size = alloced_bytes; | |
1920 | ||
6c63e423 | 1921 | hfi1_cdbg(PROC, |
d2590edc | 1922 | "ctxt%u: Alloced %u rcv tid entries @ %uKB, total %uKB", |
23002d5b GH |
1923 | rcd->ctxt, rcd->egrbufs.alloced, |
1924 | rcd->egrbufs.rcvtid_size / 1024, rcd->egrbufs.size / 1024); | |
6c63e423 | 1925 | |
77241056 MM |
1926 | /* |
1927 | * Set the contexts rcv array head update threshold to the closest | |
1928 | * power of 2 (so we can use a mask instead of modulo) below half | |
1929 | * the allocated entries. | |
1930 | */ | |
1931 | rcd->egrbufs.threshold = | |
1932 | rounddown_pow_of_two(rcd->egrbufs.alloced / 2); | |
1933 | /* | |
1934 | * Compute the expected RcvArray entry base. This is done after | |
1935 | * allocating the eager buffers in order to maximize the | |
1936 | * expected RcvArray entries for the context. | |
1937 | */ | |
1938 | max_entries = rcd->rcv_array_groups * dd->rcv_entries.group_size; | |
1939 | egrtop = roundup(rcd->egrbufs.alloced, dd->rcv_entries.group_size); | |
1940 | rcd->expected_count = max_entries - egrtop; | |
1941 | if (rcd->expected_count > MAX_TID_PAIR_ENTRIES * 2) | |
1942 | rcd->expected_count = MAX_TID_PAIR_ENTRIES * 2; | |
1943 | ||
1944 | rcd->expected_base = rcd->eager_base + egrtop; | |
d2590edc | 1945 | hfi1_cdbg(PROC, "ctxt%u: eager:%u, exp:%u, egrbase:%u, expbase:%u", |
6c63e423 SS |
1946 | rcd->ctxt, rcd->egrbufs.alloced, rcd->expected_count, |
1947 | rcd->eager_base, rcd->expected_base); | |
77241056 MM |
1948 | |
1949 | if (!hfi1_rcvbuf_validate(rcd->egrbufs.rcvtid_size, PT_EAGER, &order)) { | |
6c63e423 | 1950 | hfi1_cdbg(PROC, |
d2590edc | 1951 | "ctxt%u: current Eager buffer size is invalid %u", |
6c63e423 | 1952 | rcd->ctxt, rcd->egrbufs.rcvtid_size); |
77241056 | 1953 | ret = -EINVAL; |
62239fc6 | 1954 | goto bail_rcvegrbuf_phys; |
77241056 MM |
1955 | } |
1956 | ||
1957 | for (idx = 0; idx < rcd->egrbufs.alloced; idx++) { | |
1958 | hfi1_put_tid(dd, rcd->eager_base + idx, PT_EAGER, | |
60368186 | 1959 | rcd->egrbufs.rcvtids[idx].dma, order); |
77241056 MM |
1960 | cond_resched(); |
1961 | } | |
62239fc6 MR |
1962 | |
1963 | return 0; | |
77241056 MM |
1964 | |
1965 | bail_rcvegrbuf_phys: | |
1966 | for (idx = 0; idx < rcd->egrbufs.alloced && | |
17fb4f29 | 1967 | rcd->egrbufs.buffers[idx].addr; |
77241056 MM |
1968 | idx++) { |
1969 | dma_free_coherent(&dd->pcidev->dev, | |
1970 | rcd->egrbufs.buffers[idx].len, | |
1971 | rcd->egrbufs.buffers[idx].addr, | |
60368186 | 1972 | rcd->egrbufs.buffers[idx].dma); |
77241056 | 1973 | rcd->egrbufs.buffers[idx].addr = NULL; |
60368186 | 1974 | rcd->egrbufs.buffers[idx].dma = 0; |
77241056 MM |
1975 | rcd->egrbufs.buffers[idx].len = 0; |
1976 | } | |
62239fc6 | 1977 | |
77241056 MM |
1978 | return ret; |
1979 | } |