Commit | Line | Data |
---|---|---|
2a055eb7 | 1 | /* |
fe314195 | 2 | * Copyright(c) 2016 Intel Corporation. |
2a055eb7 DD |
3 | * |
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | |
5 | * redistributing this file, you may do so under either license. | |
6 | * | |
7 | * GPL LICENSE SUMMARY | |
8 | * | |
9 | * This program is free software; you can redistribute it and/or modify | |
10 | * it under the terms of version 2 of the GNU General Public License as | |
11 | * published by the Free Software Foundation. | |
12 | * | |
13 | * This program is distributed in the hope that it will be useful, but | |
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | * General Public License for more details. | |
17 | * | |
18 | * BSD LICENSE | |
19 | * | |
20 | * Redistribution and use in source and binary forms, with or without | |
21 | * modification, are permitted provided that the following conditions | |
22 | * are met: | |
23 | * | |
24 | * - Redistributions of source code must retain the above copyright | |
25 | * notice, this list of conditions and the following disclaimer. | |
26 | * - Redistributions in binary form must reproduce the above copyright | |
27 | * notice, this list of conditions and the following disclaimer in | |
28 | * the documentation and/or other materials provided with the | |
29 | * distribution. | |
30 | * - Neither the name of Intel Corporation nor the names of its | |
31 | * contributors may be used to endorse or promote products derived | |
32 | * from this software without specific prior written permission. | |
33 | * | |
34 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
35 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
36 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
37 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
38 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
39 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
40 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
41 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
42 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
43 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
44 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
45 | * | |
46 | */ | |
47 | ||
48 | #include <linux/slab.h> | |
7b1e2099 DD |
49 | #include <linux/vmalloc.h> |
50 | #include <rdma/ib_umem.h> | |
51 | #include <rdma/rdma_vt.h> | |
52 | #include "vt.h" | |
2a055eb7 DD |
53 | #include "mr.h" |
54 | ||
90793f71 DD |
55 | /** |
56 | * rvt_driver_mr_init - Init MR resources per driver | |
57 | * @rdi: rvt dev struct | |
58 | * | |
7b1e2099 | 59 | * Do any intilization needed when a driver registers with rdmavt. |
90793f71 DD |
60 | * |
61 | * Return: 0 on success or errno on failure | |
7b1e2099 DD |
62 | */ |
63 | int rvt_driver_mr_init(struct rvt_dev_info *rdi) | |
64 | { | |
65 | unsigned int lkey_table_size = rdi->dparms.lkey_table_size; | |
66 | unsigned lk_tab_size; | |
67 | int i; | |
68 | ||
7b1e2099 DD |
69 | /* |
70 | * The top hfi1_lkey_table_size bits are used to index the | |
71 | * table. The lower 8 bits can be owned by the user (copied from | |
72 | * the LKEY). The remaining bits act as a generation number or tag. | |
73 | */ | |
74 | if (!lkey_table_size) | |
75 | return -EINVAL; | |
76 | ||
77 | spin_lock_init(&rdi->lkey_table.lock); | |
78 | ||
7b1e2099 DD |
79 | /* ensure generation is at least 4 bits */ |
80 | if (lkey_table_size > RVT_MAX_LKEY_TABLE_BITS) { | |
81 | rvt_pr_warn(rdi, "lkey bits %u too large, reduced to %u\n", | |
82 | lkey_table_size, RVT_MAX_LKEY_TABLE_BITS); | |
83 | rdi->dparms.lkey_table_size = RVT_MAX_LKEY_TABLE_BITS; | |
84 | lkey_table_size = rdi->dparms.lkey_table_size; | |
85 | } | |
ade30240 | 86 | rdi->lkey_table.max = 1 << lkey_table_size; |
7b1e2099 DD |
87 | lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table); |
88 | rdi->lkey_table.table = (struct rvt_mregion __rcu **) | |
d1b697b6 | 89 | vmalloc_node(lk_tab_size, rdi->dparms.node); |
7b1e2099 DD |
90 | if (!rdi->lkey_table.table) |
91 | return -ENOMEM; | |
92 | ||
93 | RCU_INIT_POINTER(rdi->dma_mr, NULL); | |
94 | for (i = 0; i < rdi->lkey_table.max; i++) | |
95 | RCU_INIT_POINTER(rdi->lkey_table.table[i], NULL); | |
96 | ||
97 | return 0; | |
98 | } | |
99 | ||
90793f71 DD |
100 | /** |
101 | *rvt_mr_exit: clean up MR | |
102 | *@rdi: rvt dev structure | |
103 | * | |
7b1e2099 DD |
104 | * called when drivers have unregistered or perhaps failed to register with us |
105 | */ | |
106 | void rvt_mr_exit(struct rvt_dev_info *rdi) | |
107 | { | |
108 | if (rdi->dma_mr) | |
109 | rvt_pr_err(rdi, "DMA MR not null!\n"); | |
110 | ||
111 | vfree(rdi->lkey_table.table); | |
112 | } | |
113 | ||
114 | static void rvt_deinit_mregion(struct rvt_mregion *mr) | |
115 | { | |
116 | int i = mr->mapsz; | |
117 | ||
118 | mr->mapsz = 0; | |
119 | while (i) | |
120 | kfree(mr->map[--i]); | |
121 | } | |
122 | ||
123 | static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd, | |
124 | int count) | |
125 | { | |
126 | int m, i = 0; | |
49961f8f | 127 | struct rvt_dev_info *dev = ib_to_rvt(pd->device); |
7b1e2099 DD |
128 | |
129 | mr->mapsz = 0; | |
130 | m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ; | |
131 | for (; i < m; i++) { | |
49961f8f JJ |
132 | mr->map[i] = kzalloc_node(sizeof(*mr->map[0]), GFP_KERNEL, |
133 | dev->dparms.node); | |
7b1e2099 DD |
134 | if (!mr->map[i]) { |
135 | rvt_deinit_mregion(mr); | |
136 | return -ENOMEM; | |
137 | } | |
138 | mr->mapsz++; | |
139 | } | |
140 | init_completion(&mr->comp); | |
141 | /* count returning the ptr to user */ | |
142 | atomic_set(&mr->refcount, 1); | |
e8f8b098 | 143 | atomic_set(&mr->lkey_invalid, 0); |
7b1e2099 DD |
144 | mr->pd = pd; |
145 | mr->max_segs = count; | |
146 | return 0; | |
147 | } | |
148 | ||
149 | /** | |
150 | * rvt_alloc_lkey - allocate an lkey | |
151 | * @mr: memory region that this lkey protects | |
152 | * @dma_region: 0->normal key, 1->restricted DMA key | |
153 | * | |
154 | * Returns 0 if successful, otherwise returns -errno. | |
155 | * | |
156 | * Increments mr reference count as required. | |
157 | * | |
158 | * Sets the lkey field mr for non-dma regions. | |
159 | * | |
160 | */ | |
161 | static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region) | |
162 | { | |
163 | unsigned long flags; | |
164 | u32 r; | |
165 | u32 n; | |
166 | int ret = 0; | |
167 | struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device); | |
168 | struct rvt_lkey_table *rkt = &dev->lkey_table; | |
169 | ||
170 | rvt_get_mr(mr); | |
171 | spin_lock_irqsave(&rkt->lock, flags); | |
172 | ||
173 | /* special case for dma_mr lkey == 0 */ | |
174 | if (dma_region) { | |
175 | struct rvt_mregion *tmr; | |
176 | ||
177 | tmr = rcu_access_pointer(dev->dma_mr); | |
178 | if (!tmr) { | |
179 | rcu_assign_pointer(dev->dma_mr, mr); | |
180 | mr->lkey_published = 1; | |
181 | } else { | |
182 | rvt_put_mr(mr); | |
183 | } | |
184 | goto success; | |
185 | } | |
186 | ||
187 | /* Find the next available LKEY */ | |
188 | r = rkt->next; | |
189 | n = r; | |
190 | for (;;) { | |
191 | if (!rcu_access_pointer(rkt->table[r])) | |
192 | break; | |
193 | r = (r + 1) & (rkt->max - 1); | |
194 | if (r == n) | |
195 | goto bail; | |
196 | } | |
197 | rkt->next = (r + 1) & (rkt->max - 1); | |
198 | /* | |
199 | * Make sure lkey is never zero which is reserved to indicate an | |
200 | * unrestricted LKEY. | |
201 | */ | |
202 | rkt->gen++; | |
203 | /* | |
204 | * bits are capped to ensure enough bits for generation number | |
205 | */ | |
206 | mr->lkey = (r << (32 - dev->dparms.lkey_table_size)) | | |
207 | ((((1 << (24 - dev->dparms.lkey_table_size)) - 1) & rkt->gen) | |
208 | << 8); | |
209 | if (mr->lkey == 0) { | |
210 | mr->lkey |= 1 << 8; | |
211 | rkt->gen++; | |
212 | } | |
213 | rcu_assign_pointer(rkt->table[r], mr); | |
214 | mr->lkey_published = 1; | |
215 | success: | |
216 | spin_unlock_irqrestore(&rkt->lock, flags); | |
217 | out: | |
218 | return ret; | |
219 | bail: | |
220 | rvt_put_mr(mr); | |
221 | spin_unlock_irqrestore(&rkt->lock, flags); | |
222 | ret = -ENOMEM; | |
223 | goto out; | |
224 | } | |
225 | ||
226 | /** | |
227 | * rvt_free_lkey - free an lkey | |
228 | * @mr: mr to free from tables | |
229 | */ | |
230 | static void rvt_free_lkey(struct rvt_mregion *mr) | |
231 | { | |
232 | unsigned long flags; | |
233 | u32 lkey = mr->lkey; | |
234 | u32 r; | |
235 | struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device); | |
236 | struct rvt_lkey_table *rkt = &dev->lkey_table; | |
237 | int freed = 0; | |
238 | ||
239 | spin_lock_irqsave(&rkt->lock, flags); | |
240 | if (!mr->lkey_published) | |
241 | goto out; | |
242 | if (lkey == 0) { | |
243 | RCU_INIT_POINTER(dev->dma_mr, NULL); | |
244 | } else { | |
245 | r = lkey >> (32 - dev->dparms.lkey_table_size); | |
246 | RCU_INIT_POINTER(rkt->table[r], NULL); | |
247 | } | |
248 | mr->lkey_published = 0; | |
249 | freed++; | |
250 | out: | |
251 | spin_unlock_irqrestore(&rkt->lock, flags); | |
252 | if (freed) { | |
253 | synchronize_rcu(); | |
254 | rvt_put_mr(mr); | |
255 | } | |
256 | } | |
257 | ||
258 | static struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd) | |
259 | { | |
260 | struct rvt_mr *mr; | |
261 | int rval = -ENOMEM; | |
262 | int m; | |
263 | ||
264 | /* Allocate struct plus pointers to first level page tables. */ | |
265 | m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ; | |
266 | mr = kzalloc(sizeof(*mr) + m * sizeof(mr->mr.map[0]), GFP_KERNEL); | |
267 | if (!mr) | |
268 | goto bail; | |
269 | ||
270 | rval = rvt_init_mregion(&mr->mr, pd, count); | |
271 | if (rval) | |
272 | goto bail; | |
273 | /* | |
274 | * ib_reg_phys_mr() will initialize mr->ibmr except for | |
275 | * lkey and rkey. | |
276 | */ | |
277 | rval = rvt_alloc_lkey(&mr->mr, 0); | |
278 | if (rval) | |
279 | goto bail_mregion; | |
280 | mr->ibmr.lkey = mr->mr.lkey; | |
281 | mr->ibmr.rkey = mr->mr.lkey; | |
282 | done: | |
283 | return mr; | |
284 | ||
285 | bail_mregion: | |
286 | rvt_deinit_mregion(&mr->mr); | |
287 | bail: | |
288 | kfree(mr); | |
289 | mr = ERR_PTR(rval); | |
290 | goto done; | |
291 | } | |
292 | ||
293 | static void __rvt_free_mr(struct rvt_mr *mr) | |
294 | { | |
295 | rvt_deinit_mregion(&mr->mr); | |
296 | rvt_free_lkey(&mr->mr); | |
e4618d40 | 297 | kfree(mr); |
7b1e2099 DD |
298 | } |
299 | ||
2a055eb7 DD |
300 | /** |
301 | * rvt_get_dma_mr - get a DMA memory region | |
302 | * @pd: protection domain for this memory region | |
303 | * @acc: access flags | |
304 | * | |
90793f71 | 305 | * Return: the memory region on success, otherwise returns an errno. |
2a055eb7 DD |
306 | * Note that all DMA addresses should be created via the |
307 | * struct ib_dma_mapping_ops functions (see dma.c). | |
308 | */ | |
309 | struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc) | |
310 | { | |
7b1e2099 DD |
311 | struct rvt_mr *mr; |
312 | struct ib_mr *ret; | |
313 | int rval; | |
314 | ||
315 | if (ibpd_to_rvtpd(pd)->user) | |
316 | return ERR_PTR(-EPERM); | |
317 | ||
318 | mr = kzalloc(sizeof(*mr), GFP_KERNEL); | |
319 | if (!mr) { | |
320 | ret = ERR_PTR(-ENOMEM); | |
321 | goto bail; | |
322 | } | |
323 | ||
324 | rval = rvt_init_mregion(&mr->mr, pd, 0); | |
325 | if (rval) { | |
326 | ret = ERR_PTR(rval); | |
327 | goto bail; | |
328 | } | |
329 | ||
330 | rval = rvt_alloc_lkey(&mr->mr, 1); | |
331 | if (rval) { | |
332 | ret = ERR_PTR(rval); | |
333 | goto bail_mregion; | |
334 | } | |
335 | ||
336 | mr->mr.access_flags = acc; | |
337 | ret = &mr->ibmr; | |
338 | done: | |
339 | return ret; | |
340 | ||
341 | bail_mregion: | |
342 | rvt_deinit_mregion(&mr->mr); | |
343 | bail: | |
344 | kfree(mr); | |
345 | goto done; | |
2a055eb7 DD |
346 | } |
347 | ||
348 | /** | |
349 | * rvt_reg_user_mr - register a userspace memory region | |
350 | * @pd: protection domain for this memory region | |
351 | * @start: starting userspace address | |
352 | * @length: length of region to register | |
353 | * @mr_access_flags: access flags for this memory region | |
354 | * @udata: unused by the driver | |
355 | * | |
90793f71 | 356 | * Return: the memory region on success, otherwise returns an errno. |
2a055eb7 DD |
357 | */ |
358 | struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, | |
359 | u64 virt_addr, int mr_access_flags, | |
360 | struct ib_udata *udata) | |
361 | { | |
7b1e2099 DD |
362 | struct rvt_mr *mr; |
363 | struct ib_umem *umem; | |
364 | struct scatterlist *sg; | |
365 | int n, m, entry; | |
366 | struct ib_mr *ret; | |
367 | ||
368 | if (length == 0) | |
369 | return ERR_PTR(-EINVAL); | |
370 | ||
371 | umem = ib_umem_get(pd->uobject->context, start, length, | |
372 | mr_access_flags, 0); | |
373 | if (IS_ERR(umem)) | |
374 | return (void *)umem; | |
375 | ||
376 | n = umem->nmap; | |
377 | ||
378 | mr = __rvt_alloc_mr(n, pd); | |
379 | if (IS_ERR(mr)) { | |
380 | ret = (struct ib_mr *)mr; | |
381 | goto bail_umem; | |
382 | } | |
383 | ||
384 | mr->mr.user_base = start; | |
385 | mr->mr.iova = virt_addr; | |
386 | mr->mr.length = length; | |
387 | mr->mr.offset = ib_umem_offset(umem); | |
388 | mr->mr.access_flags = mr_access_flags; | |
389 | mr->umem = umem; | |
390 | ||
391 | if (is_power_of_2(umem->page_size)) | |
392 | mr->mr.page_shift = ilog2(umem->page_size); | |
393 | m = 0; | |
394 | n = 0; | |
395 | for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { | |
396 | void *vaddr; | |
397 | ||
398 | vaddr = page_address(sg_page(sg)); | |
399 | if (!vaddr) { | |
400 | ret = ERR_PTR(-EINVAL); | |
401 | goto bail_inval; | |
402 | } | |
403 | mr->mr.map[m]->segs[n].vaddr = vaddr; | |
404 | mr->mr.map[m]->segs[n].length = umem->page_size; | |
405 | n++; | |
406 | if (n == RVT_SEGSZ) { | |
407 | m++; | |
408 | n = 0; | |
409 | } | |
410 | } | |
411 | return &mr->ibmr; | |
412 | ||
413 | bail_inval: | |
414 | __rvt_free_mr(mr); | |
415 | ||
416 | bail_umem: | |
417 | ib_umem_release(umem); | |
418 | ||
419 | return ret; | |
2a055eb7 DD |
420 | } |
421 | ||
422 | /** | |
423 | * rvt_dereg_mr - unregister and free a memory region | |
424 | * @ibmr: the memory region to free | |
425 | * | |
2a055eb7 DD |
426 | * |
427 | * Note that this is called to free MRs created by rvt_get_dma_mr() | |
428 | * or rvt_reg_user_mr(). | |
90793f71 DD |
429 | * |
430 | * Returns 0 on success. | |
2a055eb7 DD |
431 | */ |
432 | int rvt_dereg_mr(struct ib_mr *ibmr) | |
433 | { | |
7b1e2099 DD |
434 | struct rvt_mr *mr = to_imr(ibmr); |
435 | struct rvt_dev_info *rdi = ib_to_rvt(ibmr->pd->device); | |
436 | int ret = 0; | |
437 | unsigned long timeout; | |
438 | ||
439 | rvt_free_lkey(&mr->mr); | |
440 | ||
441 | rvt_put_mr(&mr->mr); /* will set completion if last */ | |
442 | timeout = wait_for_completion_timeout(&mr->mr.comp, 5 * HZ); | |
443 | if (!timeout) { | |
444 | rvt_pr_err(rdi, | |
445 | "rvt_dereg_mr timeout mr %p pd %p refcount %u\n", | |
446 | mr, mr->mr.pd, atomic_read(&mr->mr.refcount)); | |
447 | rvt_get_mr(&mr->mr); | |
448 | ret = -EBUSY; | |
449 | goto out; | |
450 | } | |
451 | rvt_deinit_mregion(&mr->mr); | |
452 | if (mr->umem) | |
453 | ib_umem_release(mr->umem); | |
454 | kfree(mr); | |
455 | out: | |
456 | return ret; | |
2a055eb7 DD |
457 | } |
458 | ||
459 | /** | |
460 | * rvt_alloc_mr - Allocate a memory region usable with the | |
461 | * @pd: protection domain for this memory region | |
462 | * @mr_type: mem region type | |
463 | * @max_num_sg: Max number of segments allowed | |
464 | * | |
90793f71 | 465 | * Return: the memory region on success, otherwise return an errno. |
2a055eb7 DD |
466 | */ |
467 | struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, | |
468 | enum ib_mr_type mr_type, | |
469 | u32 max_num_sg) | |
470 | { | |
7b1e2099 DD |
471 | struct rvt_mr *mr; |
472 | ||
473 | if (mr_type != IB_MR_TYPE_MEM_REG) | |
474 | return ERR_PTR(-EINVAL); | |
475 | ||
476 | mr = __rvt_alloc_mr(max_num_sg, pd); | |
477 | if (IS_ERR(mr)) | |
478 | return (struct ib_mr *)mr; | |
479 | ||
480 | return &mr->ibmr; | |
2a055eb7 DD |
481 | } |
482 | ||
a41081aa JX |
483 | /** |
484 | * rvt_set_page - page assignment function called by ib_sg_to_pages | |
485 | * @ibmr: memory region | |
486 | * @addr: dma address of mapped page | |
487 | * | |
488 | * Return: 0 on success | |
489 | */ | |
490 | static int rvt_set_page(struct ib_mr *ibmr, u64 addr) | |
491 | { | |
492 | struct rvt_mr *mr = to_imr(ibmr); | |
493 | u32 ps = 1 << mr->mr.page_shift; | |
494 | u32 mapped_segs = mr->mr.length >> mr->mr.page_shift; | |
495 | int m, n; | |
496 | ||
497 | if (unlikely(mapped_segs == mr->mr.max_segs)) | |
498 | return -ENOMEM; | |
499 | ||
500 | if (mr->mr.length == 0) { | |
501 | mr->mr.user_base = addr; | |
502 | mr->mr.iova = addr; | |
503 | } | |
504 | ||
505 | m = mapped_segs / RVT_SEGSZ; | |
506 | n = mapped_segs % RVT_SEGSZ; | |
507 | mr->mr.map[m]->segs[n].vaddr = (void *)addr; | |
508 | mr->mr.map[m]->segs[n].length = ps; | |
509 | mr->mr.length += ps; | |
510 | ||
511 | return 0; | |
512 | } | |
513 | ||
514 | /** | |
515 | * rvt_map_mr_sg - map sg list and set it the memory region | |
516 | * @ibmr: memory region | |
517 | * @sg: dma mapped scatterlist | |
518 | * @sg_nents: number of entries in sg | |
519 | * @sg_offset: offset in bytes into sg | |
520 | * | |
521 | * Return: number of sg elements mapped to the memory region | |
522 | */ | |
523 | int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, | |
524 | int sg_nents, unsigned int *sg_offset) | |
525 | { | |
526 | struct rvt_mr *mr = to_imr(ibmr); | |
527 | ||
528 | mr->mr.length = 0; | |
529 | mr->mr.page_shift = PAGE_SHIFT; | |
530 | return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, | |
531 | rvt_set_page); | |
532 | } | |
533 | ||
e8f8b098 JX |
534 | /** |
535 | * rvt_fast_reg_mr - fast register physical MR | |
536 | * @qp: the queue pair where the work request comes from | |
537 | * @ibmr: the memory region to be registered | |
538 | * @key: updated key for this memory region | |
539 | * @access: access flags for this memory region | |
540 | * | |
541 | * Returns 0 on success. | |
542 | */ | |
543 | int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key, | |
544 | int access) | |
545 | { | |
546 | struct rvt_mr *mr = to_imr(ibmr); | |
547 | ||
548 | if (qp->ibqp.pd != mr->mr.pd) | |
549 | return -EACCES; | |
550 | ||
551 | /* not applicable to dma MR or user MR */ | |
552 | if (!mr->mr.lkey || mr->umem) | |
553 | return -EINVAL; | |
554 | ||
555 | if ((key & 0xFFFFFF00) != (mr->mr.lkey & 0xFFFFFF00)) | |
556 | return -EINVAL; | |
557 | ||
558 | ibmr->lkey = key; | |
559 | ibmr->rkey = key; | |
560 | mr->mr.lkey = key; | |
561 | mr->mr.access_flags = access; | |
562 | atomic_set(&mr->mr.lkey_invalid, 0); | |
563 | ||
564 | return 0; | |
565 | } | |
566 | EXPORT_SYMBOL(rvt_fast_reg_mr); | |
567 | ||
568 | /** | |
569 | * rvt_invalidate_rkey - invalidate an MR rkey | |
570 | * @qp: queue pair associated with the invalidate op | |
571 | * @rkey: rkey to invalidate | |
572 | * | |
573 | * Returns 0 on success. | |
574 | */ | |
575 | int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey) | |
576 | { | |
577 | struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device); | |
578 | struct rvt_lkey_table *rkt = &dev->lkey_table; | |
579 | struct rvt_mregion *mr; | |
580 | ||
581 | if (rkey == 0) | |
582 | return -EINVAL; | |
583 | ||
584 | rcu_read_lock(); | |
585 | mr = rcu_dereference( | |
586 | rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]); | |
587 | if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) | |
588 | goto bail; | |
589 | ||
590 | atomic_set(&mr->lkey_invalid, 1); | |
591 | rcu_read_unlock(); | |
592 | return 0; | |
593 | ||
594 | bail: | |
595 | rcu_read_unlock(); | |
596 | return -EINVAL; | |
597 | } | |
598 | EXPORT_SYMBOL(rvt_invalidate_rkey); | |
599 | ||
2a055eb7 DD |
600 | /** |
601 | * rvt_alloc_fmr - allocate a fast memory region | |
602 | * @pd: the protection domain for this memory region | |
603 | * @mr_access_flags: access flags for this memory region | |
604 | * @fmr_attr: fast memory region attributes | |
605 | * | |
90793f71 | 606 | * Return: the memory region on success, otherwise returns an errno. |
2a055eb7 DD |
607 | */ |
608 | struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags, | |
609 | struct ib_fmr_attr *fmr_attr) | |
610 | { | |
7b1e2099 DD |
611 | struct rvt_fmr *fmr; |
612 | int m; | |
613 | struct ib_fmr *ret; | |
614 | int rval = -ENOMEM; | |
615 | ||
616 | /* Allocate struct plus pointers to first level page tables. */ | |
617 | m = (fmr_attr->max_pages + RVT_SEGSZ - 1) / RVT_SEGSZ; | |
618 | fmr = kzalloc(sizeof(*fmr) + m * sizeof(fmr->mr.map[0]), GFP_KERNEL); | |
619 | if (!fmr) | |
620 | goto bail; | |
621 | ||
622 | rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages); | |
623 | if (rval) | |
624 | goto bail; | |
625 | ||
626 | /* | |
627 | * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey & | |
628 | * rkey. | |
629 | */ | |
630 | rval = rvt_alloc_lkey(&fmr->mr, 0); | |
631 | if (rval) | |
632 | goto bail_mregion; | |
633 | fmr->ibfmr.rkey = fmr->mr.lkey; | |
634 | fmr->ibfmr.lkey = fmr->mr.lkey; | |
635 | /* | |
636 | * Resources are allocated but no valid mapping (RKEY can't be | |
637 | * used). | |
638 | */ | |
639 | fmr->mr.access_flags = mr_access_flags; | |
640 | fmr->mr.max_segs = fmr_attr->max_pages; | |
641 | fmr->mr.page_shift = fmr_attr->page_shift; | |
642 | ||
643 | ret = &fmr->ibfmr; | |
644 | done: | |
645 | return ret; | |
646 | ||
647 | bail_mregion: | |
648 | rvt_deinit_mregion(&fmr->mr); | |
649 | bail: | |
650 | kfree(fmr); | |
651 | ret = ERR_PTR(rval); | |
652 | goto done; | |
2a055eb7 DD |
653 | } |
654 | ||
655 | /** | |
656 | * rvt_map_phys_fmr - set up a fast memory region | |
657 | * @ibmfr: the fast memory region to set up | |
658 | * @page_list: the list of pages to associate with the fast memory region | |
659 | * @list_len: the number of pages to associate with the fast memory region | |
660 | * @iova: the virtual address of the start of the fast memory region | |
661 | * | |
662 | * This may be called from interrupt context. | |
90793f71 DD |
663 | * |
664 | * Return: 0 on success | |
2a055eb7 DD |
665 | */ |
666 | ||
667 | int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, | |
668 | int list_len, u64 iova) | |
669 | { | |
7b1e2099 DD |
670 | struct rvt_fmr *fmr = to_ifmr(ibfmr); |
671 | struct rvt_lkey_table *rkt; | |
672 | unsigned long flags; | |
673 | int m, n, i; | |
674 | u32 ps; | |
675 | struct rvt_dev_info *rdi = ib_to_rvt(ibfmr->device); | |
676 | ||
677 | i = atomic_read(&fmr->mr.refcount); | |
678 | if (i > 2) | |
679 | return -EBUSY; | |
680 | ||
681 | if (list_len > fmr->mr.max_segs) | |
682 | return -EINVAL; | |
683 | ||
684 | rkt = &rdi->lkey_table; | |
685 | spin_lock_irqsave(&rkt->lock, flags); | |
686 | fmr->mr.user_base = iova; | |
687 | fmr->mr.iova = iova; | |
688 | ps = 1 << fmr->mr.page_shift; | |
689 | fmr->mr.length = list_len * ps; | |
690 | m = 0; | |
691 | n = 0; | |
692 | for (i = 0; i < list_len; i++) { | |
693 | fmr->mr.map[m]->segs[n].vaddr = (void *)page_list[i]; | |
694 | fmr->mr.map[m]->segs[n].length = ps; | |
695 | if (++n == RVT_SEGSZ) { | |
696 | m++; | |
697 | n = 0; | |
698 | } | |
699 | } | |
700 | spin_unlock_irqrestore(&rkt->lock, flags); | |
701 | return 0; | |
2a055eb7 DD |
702 | } |
703 | ||
704 | /** | |
705 | * rvt_unmap_fmr - unmap fast memory regions | |
706 | * @fmr_list: the list of fast memory regions to unmap | |
707 | * | |
90793f71 | 708 | * Return: 0 on success. |
2a055eb7 DD |
709 | */ |
710 | int rvt_unmap_fmr(struct list_head *fmr_list) | |
711 | { | |
7b1e2099 DD |
712 | struct rvt_fmr *fmr; |
713 | struct rvt_lkey_table *rkt; | |
714 | unsigned long flags; | |
715 | struct rvt_dev_info *rdi; | |
716 | ||
717 | list_for_each_entry(fmr, fmr_list, ibfmr.list) { | |
718 | rdi = ib_to_rvt(fmr->ibfmr.device); | |
719 | rkt = &rdi->lkey_table; | |
720 | spin_lock_irqsave(&rkt->lock, flags); | |
721 | fmr->mr.user_base = 0; | |
722 | fmr->mr.iova = 0; | |
723 | fmr->mr.length = 0; | |
724 | spin_unlock_irqrestore(&rkt->lock, flags); | |
725 | } | |
726 | return 0; | |
2a055eb7 DD |
727 | } |
728 | ||
729 | /** | |
730 | * rvt_dealloc_fmr - deallocate a fast memory region | |
731 | * @ibfmr: the fast memory region to deallocate | |
732 | * | |
90793f71 | 733 | * Return: 0 on success. |
2a055eb7 DD |
734 | */ |
735 | int rvt_dealloc_fmr(struct ib_fmr *ibfmr) | |
736 | { | |
7b1e2099 DD |
737 | struct rvt_fmr *fmr = to_ifmr(ibfmr); |
738 | int ret = 0; | |
739 | unsigned long timeout; | |
740 | ||
741 | rvt_free_lkey(&fmr->mr); | |
742 | rvt_put_mr(&fmr->mr); /* will set completion if last */ | |
743 | timeout = wait_for_completion_timeout(&fmr->mr.comp, 5 * HZ); | |
744 | if (!timeout) { | |
745 | rvt_get_mr(&fmr->mr); | |
746 | ret = -EBUSY; | |
747 | goto out; | |
748 | } | |
749 | rvt_deinit_mregion(&fmr->mr); | |
750 | kfree(fmr); | |
751 | out: | |
752 | return ret; | |
753 | } | |
754 | ||
755 | /** | |
756 | * rvt_lkey_ok - check IB SGE for validity and initialize | |
757 | * @rkt: table containing lkey to check SGE against | |
758 | * @pd: protection domain | |
759 | * @isge: outgoing internal SGE | |
760 | * @sge: SGE to check | |
761 | * @acc: access flags | |
762 | * | |
90793f71 DD |
763 | * Check the IB SGE for validity and initialize our internal version |
764 | * of it. | |
765 | * | |
766 | * Return: 1 if valid and successful, otherwise returns 0. | |
7b1e2099 DD |
767 | * |
768 | * increments the reference count upon success | |
769 | * | |
7b1e2099 DD |
770 | */ |
771 | int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, | |
772 | struct rvt_sge *isge, struct ib_sge *sge, int acc) | |
773 | { | |
774 | struct rvt_mregion *mr; | |
775 | unsigned n, m; | |
776 | size_t off; | |
777 | struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); | |
778 | ||
779 | /* | |
780 | * We use LKEY == zero for kernel virtual addresses | |
781 | * (see rvt_get_dma_mr and dma.c). | |
782 | */ | |
783 | rcu_read_lock(); | |
784 | if (sge->lkey == 0) { | |
785 | if (pd->user) | |
786 | goto bail; | |
787 | mr = rcu_dereference(dev->dma_mr); | |
788 | if (!mr) | |
789 | goto bail; | |
790 | atomic_inc(&mr->refcount); | |
791 | rcu_read_unlock(); | |
792 | ||
793 | isge->mr = mr; | |
794 | isge->vaddr = (void *)sge->addr; | |
795 | isge->length = sge->length; | |
796 | isge->sge_length = sge->length; | |
797 | isge->m = 0; | |
798 | isge->n = 0; | |
799 | goto ok; | |
800 | } | |
801 | mr = rcu_dereference( | |
802 | rkt->table[(sge->lkey >> (32 - dev->dparms.lkey_table_size))]); | |
e8f8b098 JX |
803 | if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || |
804 | mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) | |
7b1e2099 DD |
805 | goto bail; |
806 | ||
807 | off = sge->addr - mr->user_base; | |
808 | if (unlikely(sge->addr < mr->user_base || | |
809 | off + sge->length > mr->length || | |
810 | (mr->access_flags & acc) != acc)) | |
811 | goto bail; | |
812 | atomic_inc(&mr->refcount); | |
813 | rcu_read_unlock(); | |
814 | ||
815 | off += mr->offset; | |
816 | if (mr->page_shift) { | |
817 | /* | |
818 | * page sizes are uniform power of 2 so no loop is necessary | |
819 | * entries_spanned_by_off is the number of times the loop below | |
820 | * would have executed. | |
821 | */ | |
822 | size_t entries_spanned_by_off; | |
823 | ||
824 | entries_spanned_by_off = off >> mr->page_shift; | |
825 | off -= (entries_spanned_by_off << mr->page_shift); | |
826 | m = entries_spanned_by_off / RVT_SEGSZ; | |
827 | n = entries_spanned_by_off % RVT_SEGSZ; | |
828 | } else { | |
829 | m = 0; | |
830 | n = 0; | |
831 | while (off >= mr->map[m]->segs[n].length) { | |
832 | off -= mr->map[m]->segs[n].length; | |
833 | n++; | |
834 | if (n >= RVT_SEGSZ) { | |
835 | m++; | |
836 | n = 0; | |
837 | } | |
838 | } | |
839 | } | |
840 | isge->mr = mr; | |
841 | isge->vaddr = mr->map[m]->segs[n].vaddr + off; | |
842 | isge->length = mr->map[m]->segs[n].length - off; | |
843 | isge->sge_length = sge->length; | |
844 | isge->m = m; | |
845 | isge->n = n; | |
846 | ok: | |
847 | return 1; | |
848 | bail: | |
849 | rcu_read_unlock(); | |
850 | return 0; | |
851 | } | |
852 | EXPORT_SYMBOL(rvt_lkey_ok); | |
853 | ||
854 | /** | |
855 | * rvt_rkey_ok - check the IB virtual address, length, and RKEY | |
856 | * @qp: qp for validation | |
857 | * @sge: SGE state | |
858 | * @len: length of data | |
859 | * @vaddr: virtual address to place data | |
860 | * @rkey: rkey to check | |
861 | * @acc: access flags | |
862 | * | |
90793f71 | 863 | * Return: 1 if successful, otherwise 0. |
7b1e2099 DD |
864 | * |
865 | * increments the reference count upon success | |
866 | */ | |
867 | int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, | |
868 | u32 len, u64 vaddr, u32 rkey, int acc) | |
869 | { | |
870 | struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device); | |
871 | struct rvt_lkey_table *rkt = &dev->lkey_table; | |
872 | struct rvt_mregion *mr; | |
873 | unsigned n, m; | |
874 | size_t off; | |
875 | ||
876 | /* | |
877 | * We use RKEY == zero for kernel virtual addresses | |
878 | * (see rvt_get_dma_mr and dma.c). | |
879 | */ | |
880 | rcu_read_lock(); | |
881 | if (rkey == 0) { | |
882 | struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd); | |
883 | struct rvt_dev_info *rdi = ib_to_rvt(pd->ibpd.device); | |
884 | ||
885 | if (pd->user) | |
886 | goto bail; | |
887 | mr = rcu_dereference(rdi->dma_mr); | |
888 | if (!mr) | |
889 | goto bail; | |
890 | atomic_inc(&mr->refcount); | |
891 | rcu_read_unlock(); | |
892 | ||
893 | sge->mr = mr; | |
894 | sge->vaddr = (void *)vaddr; | |
895 | sge->length = len; | |
896 | sge->sge_length = len; | |
897 | sge->m = 0; | |
898 | sge->n = 0; | |
899 | goto ok; | |
900 | } | |
901 | ||
902 | mr = rcu_dereference( | |
903 | rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]); | |
e8f8b098 JX |
904 | if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || |
905 | mr->lkey != rkey || qp->ibqp.pd != mr->pd)) | |
7b1e2099 DD |
906 | goto bail; |
907 | ||
908 | off = vaddr - mr->iova; | |
909 | if (unlikely(vaddr < mr->iova || off + len > mr->length || | |
910 | (mr->access_flags & acc) == 0)) | |
911 | goto bail; | |
912 | atomic_inc(&mr->refcount); | |
913 | rcu_read_unlock(); | |
914 | ||
915 | off += mr->offset; | |
916 | if (mr->page_shift) { | |
917 | /* | |
918 | * page sizes are uniform power of 2 so no loop is necessary | |
919 | * entries_spanned_by_off is the number of times the loop below | |
920 | * would have executed. | |
921 | */ | |
922 | size_t entries_spanned_by_off; | |
923 | ||
924 | entries_spanned_by_off = off >> mr->page_shift; | |
925 | off -= (entries_spanned_by_off << mr->page_shift); | |
926 | m = entries_spanned_by_off / RVT_SEGSZ; | |
927 | n = entries_spanned_by_off % RVT_SEGSZ; | |
928 | } else { | |
929 | m = 0; | |
930 | n = 0; | |
931 | while (off >= mr->map[m]->segs[n].length) { | |
932 | off -= mr->map[m]->segs[n].length; | |
933 | n++; | |
934 | if (n >= RVT_SEGSZ) { | |
935 | m++; | |
936 | n = 0; | |
937 | } | |
938 | } | |
939 | } | |
940 | sge->mr = mr; | |
941 | sge->vaddr = mr->map[m]->segs[n].vaddr + off; | |
942 | sge->length = mr->map[m]->segs[n].length - off; | |
943 | sge->sge_length = len; | |
944 | sge->m = m; | |
945 | sge->n = n; | |
946 | ok: | |
947 | return 1; | |
948 | bail: | |
949 | rcu_read_unlock(); | |
950 | return 0; | |
2a055eb7 | 951 | } |
7b1e2099 | 952 | EXPORT_SYMBOL(rvt_rkey_ok); |