Commit | Line | Data |
---|---|---|
f931551b RC |
1 | /* |
2 | * Copyright (c) 2006, 2007, 2009 QLogic Corporation. All rights reserved. | |
3 | * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. | |
4 | * | |
5 | * This software is available to you under a choice of one of two | |
6 | * licenses. You may choose to be licensed under the terms of the GNU | |
7 | * General Public License (GPL) Version 2, available from the file | |
8 | * COPYING in the main directory of this source tree, or the | |
9 | * OpenIB.org BSD license below: | |
10 | * | |
11 | * Redistribution and use in source and binary forms, with or | |
12 | * without modification, are permitted provided that the following | |
13 | * conditions are met: | |
14 | * | |
15 | * - Redistributions of source code must retain the above | |
16 | * copyright notice, this list of conditions and the following | |
17 | * disclaimer. | |
18 | * | |
19 | * - Redistributions in binary form must reproduce the above | |
20 | * copyright notice, this list of conditions and the following | |
21 | * disclaimer in the documentation and/or other materials | |
22 | * provided with the distribution. | |
23 | * | |
24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
26 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
28 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
29 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
30 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
31 | * SOFTWARE. | |
32 | */ | |
33 | ||
34 | #include "qib.h" | |
35 | ||
36 | /** | |
37 | * qib_alloc_lkey - allocate an lkey | |
f931551b | 38 | * @mr: memory region that this lkey protects |
6a82649f MM |
39 | * @dma_region: 0->normal key, 1->restricted DMA key |
40 | * | |
41 | * Returns 0 if successful, otherwise returns -errno. | |
42 | * | |
8aac4cc3 | 43 | * Increments mr reference count as required. |
6a82649f MM |
44 | * |
45 | * Sets the lkey field mr for non-dma regions. | |
f931551b | 46 | * |
f931551b RC |
47 | */ |
48 | ||
6a82649f | 49 | int qib_alloc_lkey(struct qib_mregion *mr, int dma_region) |
f931551b RC |
50 | { |
51 | unsigned long flags; | |
52 | u32 r; | |
53 | u32 n; | |
6a82649f MM |
54 | int ret = 0; |
55 | struct qib_ibdev *dev = to_idev(mr->pd->device); | |
56 | struct qib_lkey_table *rkt = &dev->lk_table; | |
f931551b RC |
57 | |
58 | spin_lock_irqsave(&rkt->lock, flags); | |
59 | ||
6a82649f MM |
60 | /* special case for dma_mr lkey == 0 */ |
61 | if (dma_region) { | |
8aac4cc3 MM |
62 | struct qib_mregion *tmr; |
63 | ||
f3bdf344 | 64 | tmr = rcu_access_pointer(dev->dma_mr); |
8aac4cc3 | 65 | if (!tmr) { |
6a82649f | 66 | qib_get_mr(mr); |
8aac4cc3 | 67 | rcu_assign_pointer(dev->dma_mr, mr); |
6a82649f MM |
68 | mr->lkey_published = 1; |
69 | } | |
70 | goto success; | |
71 | } | |
72 | ||
f931551b RC |
73 | /* Find the next available LKEY */ |
74 | r = rkt->next; | |
75 | n = r; | |
76 | for (;;) { | |
77 | if (rkt->table[r] == NULL) | |
78 | break; | |
79 | r = (r + 1) & (rkt->max - 1); | |
6a82649f | 80 | if (r == n) |
f931551b | 81 | goto bail; |
f931551b RC |
82 | } |
83 | rkt->next = (r + 1) & (rkt->max - 1); | |
84 | /* | |
85 | * Make sure lkey is never zero which is reserved to indicate an | |
86 | * unrestricted LKEY. | |
87 | */ | |
88 | rkt->gen++; | |
89 | mr->lkey = (r << (32 - ib_qib_lkey_table_size)) | | |
90 | ((((1 << (24 - ib_qib_lkey_table_size)) - 1) & rkt->gen) | |
91 | << 8); | |
92 | if (mr->lkey == 0) { | |
93 | mr->lkey |= 1 << 8; | |
94 | rkt->gen++; | |
95 | } | |
6a82649f | 96 | qib_get_mr(mr); |
8aac4cc3 | 97 | rcu_assign_pointer(rkt->table[r], mr); |
6a82649f MM |
98 | mr->lkey_published = 1; |
99 | success: | |
f931551b | 100 | spin_unlock_irqrestore(&rkt->lock, flags); |
6a82649f | 101 | out: |
f931551b | 102 | return ret; |
6a82649f MM |
103 | bail: |
104 | spin_unlock_irqrestore(&rkt->lock, flags); | |
105 | ret = -ENOMEM; | |
106 | goto out; | |
f931551b RC |
107 | } |
108 | ||
109 | /** | |
110 | * qib_free_lkey - free an lkey | |
6a82649f | 111 | * @mr: mr to free from tables |
f931551b | 112 | */ |
6a82649f | 113 | void qib_free_lkey(struct qib_mregion *mr) |
f931551b RC |
114 | { |
115 | unsigned long flags; | |
116 | u32 lkey = mr->lkey; | |
117 | u32 r; | |
6a82649f MM |
118 | struct qib_ibdev *dev = to_idev(mr->pd->device); |
119 | struct qib_lkey_table *rkt = &dev->lk_table; | |
120 | ||
121 | spin_lock_irqsave(&rkt->lock, flags); | |
122 | if (!mr->lkey_published) | |
123 | goto out; | |
8aac4cc3 MM |
124 | if (lkey == 0) |
125 | rcu_assign_pointer(dev->dma_mr, NULL); | |
126 | else { | |
f931551b | 127 | r = lkey >> (32 - ib_qib_lkey_table_size); |
8aac4cc3 | 128 | rcu_assign_pointer(rkt->table[r], NULL); |
f931551b | 129 | } |
8aac4cc3 MM |
130 | qib_put_mr(mr); |
131 | mr->lkey_published = 0; | |
6a82649f | 132 | out: |
8aac4cc3 | 133 | spin_unlock_irqrestore(&rkt->lock, flags); |
f931551b RC |
134 | } |
135 | ||
136 | /** | |
137 | * qib_lkey_ok - check IB SGE for validity and initialize | |
138 | * @rkt: table containing lkey to check SGE against | |
8aac4cc3 | 139 | * @pd: protection domain |
f931551b RC |
140 | * @isge: outgoing internal SGE |
141 | * @sge: SGE to check | |
142 | * @acc: access flags | |
143 | * | |
144 | * Return 1 if valid and successful, otherwise returns 0. | |
145 | * | |
8aac4cc3 MM |
146 | * increments the reference count upon success |
147 | * | |
f931551b RC |
148 | * Check the IB SGE for validity and initialize our internal version |
149 | * of it. | |
150 | */ | |
151 | int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, | |
152 | struct qib_sge *isge, struct ib_sge *sge, int acc) | |
153 | { | |
154 | struct qib_mregion *mr; | |
155 | unsigned n, m; | |
156 | size_t off; | |
f931551b RC |
157 | |
158 | /* | |
159 | * We use LKEY == zero for kernel virtual addresses | |
160 | * (see qib_get_dma_mr and qib_dma.c). | |
161 | */ | |
8aac4cc3 | 162 | rcu_read_lock(); |
f931551b RC |
163 | if (sge->lkey == 0) { |
164 | struct qib_ibdev *dev = to_idev(pd->ibpd.device); | |
165 | ||
166 | if (pd->user) | |
167 | goto bail; | |
8aac4cc3 MM |
168 | mr = rcu_dereference(dev->dma_mr); |
169 | if (!mr) | |
170 | goto bail; | |
171 | if (unlikely(!atomic_inc_not_zero(&mr->refcount))) | |
f931551b | 172 | goto bail; |
8aac4cc3 | 173 | rcu_read_unlock(); |
4db62d47 | 174 | |
8aac4cc3 | 175 | isge->mr = mr; |
f931551b RC |
176 | isge->vaddr = (void *) sge->addr; |
177 | isge->length = sge->length; | |
178 | isge->sge_length = sge->length; | |
179 | isge->m = 0; | |
180 | isge->n = 0; | |
181 | goto ok; | |
182 | } | |
8aac4cc3 MM |
183 | mr = rcu_dereference( |
184 | rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]); | |
185 | if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) | |
f931551b RC |
186 | goto bail; |
187 | ||
188 | off = sge->addr - mr->user_base; | |
c00aaa1a MM |
189 | if (unlikely(sge->addr < mr->user_base || |
190 | off + sge->length > mr->length || | |
191 | (mr->access_flags & acc) != acc)) | |
4db62d47 | 192 | goto bail; |
8aac4cc3 MM |
193 | if (unlikely(!atomic_inc_not_zero(&mr->refcount))) |
194 | goto bail; | |
195 | rcu_read_unlock(); | |
f931551b RC |
196 | |
197 | off += mr->offset; | |
2a600f14 MM |
198 | if (mr->page_shift) { |
199 | /* | |
200 | page sizes are uniform power of 2 so no loop is necessary | |
201 | entries_spanned_by_off is the number of times the loop below | |
202 | would have executed. | |
203 | */ | |
204 | size_t entries_spanned_by_off; | |
205 | ||
206 | entries_spanned_by_off = off >> mr->page_shift; | |
207 | off -= (entries_spanned_by_off << mr->page_shift); | |
208 | m = entries_spanned_by_off/QIB_SEGSZ; | |
209 | n = entries_spanned_by_off%QIB_SEGSZ; | |
210 | } else { | |
211 | m = 0; | |
212 | n = 0; | |
213 | while (off >= mr->map[m]->segs[n].length) { | |
214 | off -= mr->map[m]->segs[n].length; | |
215 | n++; | |
216 | if (n >= QIB_SEGSZ) { | |
217 | m++; | |
218 | n = 0; | |
219 | } | |
f931551b RC |
220 | } |
221 | } | |
f931551b RC |
222 | isge->mr = mr; |
223 | isge->vaddr = mr->map[m]->segs[n].vaddr + off; | |
224 | isge->length = mr->map[m]->segs[n].length - off; | |
225 | isge->sge_length = sge->length; | |
226 | isge->m = m; | |
227 | isge->n = n; | |
228 | ok: | |
4db62d47 | 229 | return 1; |
f931551b | 230 | bail: |
8aac4cc3 | 231 | rcu_read_unlock(); |
4db62d47 | 232 | return 0; |
f931551b RC |
233 | } |
234 | ||
235 | /** | |
236 | * qib_rkey_ok - check the IB virtual address, length, and RKEY | |
8aac4cc3 MM |
237 | * @qp: qp for validation |
238 | * @sge: SGE state | |
f931551b RC |
239 | * @len: length of data |
240 | * @vaddr: virtual address to place data | |
241 | * @rkey: rkey to check | |
242 | * @acc: access flags | |
243 | * | |
244 | * Return 1 if successful, otherwise 0. | |
8aac4cc3 MM |
245 | * |
246 | * increments the reference count upon success | |
f931551b RC |
247 | */ |
248 | int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, | |
249 | u32 len, u64 vaddr, u32 rkey, int acc) | |
250 | { | |
251 | struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; | |
252 | struct qib_mregion *mr; | |
253 | unsigned n, m; | |
254 | size_t off; | |
f931551b RC |
255 | |
256 | /* | |
257 | * We use RKEY == zero for kernel virtual addresses | |
258 | * (see qib_get_dma_mr and qib_dma.c). | |
259 | */ | |
8aac4cc3 | 260 | rcu_read_lock(); |
f931551b RC |
261 | if (rkey == 0) { |
262 | struct qib_pd *pd = to_ipd(qp->ibqp.pd); | |
263 | struct qib_ibdev *dev = to_idev(pd->ibpd.device); | |
264 | ||
265 | if (pd->user) | |
266 | goto bail; | |
8aac4cc3 MM |
267 | mr = rcu_dereference(dev->dma_mr); |
268 | if (!mr) | |
269 | goto bail; | |
270 | if (unlikely(!atomic_inc_not_zero(&mr->refcount))) | |
f931551b | 271 | goto bail; |
8aac4cc3 | 272 | rcu_read_unlock(); |
4db62d47 | 273 | |
8aac4cc3 | 274 | sge->mr = mr; |
f931551b RC |
275 | sge->vaddr = (void *) vaddr; |
276 | sge->length = len; | |
277 | sge->sge_length = len; | |
278 | sge->m = 0; | |
279 | sge->n = 0; | |
280 | goto ok; | |
281 | } | |
282 | ||
8aac4cc3 MM |
283 | mr = rcu_dereference( |
284 | rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]); | |
285 | if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) | |
f931551b RC |
286 | goto bail; |
287 | ||
288 | off = vaddr - mr->iova; | |
289 | if (unlikely(vaddr < mr->iova || off + len > mr->length || | |
290 | (mr->access_flags & acc) == 0)) | |
4db62d47 | 291 | goto bail; |
8aac4cc3 MM |
292 | if (unlikely(!atomic_inc_not_zero(&mr->refcount))) |
293 | goto bail; | |
294 | rcu_read_unlock(); | |
f931551b RC |
295 | |
296 | off += mr->offset; | |
2a600f14 MM |
297 | if (mr->page_shift) { |
298 | /* | |
299 | page sizes are uniform power of 2 so no loop is necessary | |
300 | entries_spanned_by_off is the number of times the loop below | |
301 | would have executed. | |
302 | */ | |
303 | size_t entries_spanned_by_off; | |
304 | ||
305 | entries_spanned_by_off = off >> mr->page_shift; | |
306 | off -= (entries_spanned_by_off << mr->page_shift); | |
307 | m = entries_spanned_by_off/QIB_SEGSZ; | |
308 | n = entries_spanned_by_off%QIB_SEGSZ; | |
309 | } else { | |
310 | m = 0; | |
311 | n = 0; | |
312 | while (off >= mr->map[m]->segs[n].length) { | |
313 | off -= mr->map[m]->segs[n].length; | |
314 | n++; | |
315 | if (n >= QIB_SEGSZ) { | |
316 | m++; | |
317 | n = 0; | |
318 | } | |
f931551b RC |
319 | } |
320 | } | |
f931551b RC |
321 | sge->mr = mr; |
322 | sge->vaddr = mr->map[m]->segs[n].vaddr + off; | |
323 | sge->length = mr->map[m]->segs[n].length - off; | |
324 | sge->sge_length = len; | |
325 | sge->m = m; | |
326 | sge->n = n; | |
327 | ok: | |
4db62d47 | 328 | return 1; |
f931551b | 329 | bail: |
8aac4cc3 | 330 | rcu_read_unlock(); |
4db62d47 | 331 | return 0; |
f931551b RC |
332 | } |
333 | ||
334 | /* | |
335 | * Initialize the memory region specified by the work reqeust. | |
336 | */ | |
337 | int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr) | |
338 | { | |
339 | struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; | |
340 | struct qib_pd *pd = to_ipd(qp->ibqp.pd); | |
341 | struct qib_mregion *mr; | |
342 | u32 rkey = wr->wr.fast_reg.rkey; | |
343 | unsigned i, n, m; | |
344 | int ret = -EINVAL; | |
345 | unsigned long flags; | |
346 | u64 *page_list; | |
347 | size_t ps; | |
348 | ||
349 | spin_lock_irqsave(&rkt->lock, flags); | |
350 | if (pd->user || rkey == 0) | |
351 | goto bail; | |
352 | ||
7e230177 MM |
353 | mr = rcu_dereference_protected( |
354 | rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))], | |
355 | lockdep_is_held(&rkt->lock)); | |
f931551b RC |
356 | if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd)) |
357 | goto bail; | |
358 | ||
359 | if (wr->wr.fast_reg.page_list_len > mr->max_segs) | |
360 | goto bail; | |
361 | ||
362 | ps = 1UL << wr->wr.fast_reg.page_shift; | |
363 | if (wr->wr.fast_reg.length > ps * wr->wr.fast_reg.page_list_len) | |
364 | goto bail; | |
365 | ||
366 | mr->user_base = wr->wr.fast_reg.iova_start; | |
367 | mr->iova = wr->wr.fast_reg.iova_start; | |
368 | mr->lkey = rkey; | |
369 | mr->length = wr->wr.fast_reg.length; | |
370 | mr->access_flags = wr->wr.fast_reg.access_flags; | |
371 | page_list = wr->wr.fast_reg.page_list->page_list; | |
372 | m = 0; | |
373 | n = 0; | |
374 | for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { | |
375 | mr->map[m]->segs[n].vaddr = (void *) page_list[i]; | |
376 | mr->map[m]->segs[n].length = ps; | |
377 | if (++n == QIB_SEGSZ) { | |
378 | m++; | |
379 | n = 0; | |
380 | } | |
381 | } | |
382 | ||
383 | ret = 0; | |
384 | bail: | |
385 | spin_unlock_irqrestore(&rkt->lock, flags); | |
386 | return ret; | |
387 | } |