Commit | Line | Data |
---|---|---|
77241056 MM |
1 | /* |
2 | * | |
3 | * This file is provided under a dual BSD/GPLv2 license. When using or | |
4 | * redistributing this file, you may do so under either license. | |
5 | * | |
6 | * GPL LICENSE SUMMARY | |
7 | * | |
8 | * Copyright(c) 2015 Intel Corporation. | |
9 | * | |
10 | * This program is free software; you can redistribute it and/or modify | |
11 | * it under the terms of version 2 of the GNU General Public License as | |
12 | * published by the Free Software Foundation. | |
13 | * | |
14 | * This program is distributed in the hope that it will be useful, but | |
15 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | * General Public License for more details. | |
18 | * | |
19 | * BSD LICENSE | |
20 | * | |
21 | * Copyright(c) 2015 Intel Corporation. | |
22 | * | |
23 | * Redistribution and use in source and binary forms, with or without | |
24 | * modification, are permitted provided that the following conditions | |
25 | * are met: | |
26 | * | |
27 | * - Redistributions of source code must retain the above copyright | |
28 | * notice, this list of conditions and the following disclaimer. | |
29 | * - Redistributions in binary form must reproduce the above copyright | |
30 | * notice, this list of conditions and the following disclaimer in | |
31 | * the documentation and/or other materials provided with the | |
32 | * distribution. | |
33 | * - Neither the name of Intel Corporation nor the names of its | |
34 | * contributors may be used to endorse or promote products derived | |
35 | * from this software without specific prior written permission. | |
36 | * | |
37 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
38 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
39 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
40 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
41 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
42 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
43 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
44 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
45 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
46 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
47 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
48 | * | |
49 | */ | |
50 | ||
51 | #include "hfi.h" | |
52 | ||
53 | /** | |
54 | * hfi1_alloc_lkey - allocate an lkey | |
55 | * @mr: memory region that this lkey protects | |
56 | * @dma_region: 0->normal key, 1->restricted DMA key | |
57 | * | |
58 | * Returns 0 if successful, otherwise returns -errno. | |
59 | * | |
60 | * Increments mr reference count as required. | |
61 | * | |
62 | * Sets the lkey field mr for non-dma regions. | |
63 | * | |
64 | */ | |
65 | ||
66 | int hfi1_alloc_lkey(struct hfi1_mregion *mr, int dma_region) | |
67 | { | |
68 | unsigned long flags; | |
69 | u32 r; | |
70 | u32 n; | |
71 | int ret = 0; | |
72 | struct hfi1_ibdev *dev = to_idev(mr->pd->device); | |
73 | struct hfi1_lkey_table *rkt = &dev->lk_table; | |
74 | ||
75 | hfi1_get_mr(mr); | |
76 | spin_lock_irqsave(&rkt->lock, flags); | |
77 | ||
78 | /* special case for dma_mr lkey == 0 */ | |
79 | if (dma_region) { | |
80 | struct hfi1_mregion *tmr; | |
81 | ||
82 | tmr = rcu_access_pointer(dev->dma_mr); | |
83 | if (!tmr) { | |
84 | rcu_assign_pointer(dev->dma_mr, mr); | |
85 | mr->lkey_published = 1; | |
86 | } else { | |
87 | hfi1_put_mr(mr); | |
88 | } | |
89 | goto success; | |
90 | } | |
91 | ||
92 | /* Find the next available LKEY */ | |
93 | r = rkt->next; | |
94 | n = r; | |
95 | for (;;) { | |
96 | if (!rcu_access_pointer(rkt->table[r])) | |
97 | break; | |
98 | r = (r + 1) & (rkt->max - 1); | |
99 | if (r == n) | |
100 | goto bail; | |
101 | } | |
102 | rkt->next = (r + 1) & (rkt->max - 1); | |
103 | /* | |
104 | * Make sure lkey is never zero which is reserved to indicate an | |
105 | * unrestricted LKEY. | |
106 | */ | |
107 | rkt->gen++; | |
108 | /* | |
109 | * bits are capped in verbs.c to ensure enough bits for | |
110 | * generation number | |
111 | */ | |
112 | mr->lkey = (r << (32 - hfi1_lkey_table_size)) | | |
113 | ((((1 << (24 - hfi1_lkey_table_size)) - 1) & rkt->gen) | |
114 | << 8); | |
115 | if (mr->lkey == 0) { | |
116 | mr->lkey |= 1 << 8; | |
117 | rkt->gen++; | |
118 | } | |
119 | rcu_assign_pointer(rkt->table[r], mr); | |
120 | mr->lkey_published = 1; | |
121 | success: | |
122 | spin_unlock_irqrestore(&rkt->lock, flags); | |
123 | out: | |
124 | return ret; | |
125 | bail: | |
126 | hfi1_put_mr(mr); | |
127 | spin_unlock_irqrestore(&rkt->lock, flags); | |
128 | ret = -ENOMEM; | |
129 | goto out; | |
130 | } | |
131 | ||
132 | /** | |
133 | * hfi1_free_lkey - free an lkey | |
134 | * @mr: mr to free from tables | |
135 | */ | |
136 | void hfi1_free_lkey(struct hfi1_mregion *mr) | |
137 | { | |
138 | unsigned long flags; | |
139 | u32 lkey = mr->lkey; | |
140 | u32 r; | |
141 | struct hfi1_ibdev *dev = to_idev(mr->pd->device); | |
142 | struct hfi1_lkey_table *rkt = &dev->lk_table; | |
143 | int freed = 0; | |
144 | ||
145 | spin_lock_irqsave(&rkt->lock, flags); | |
146 | if (!mr->lkey_published) | |
147 | goto out; | |
148 | if (lkey == 0) | |
149 | RCU_INIT_POINTER(dev->dma_mr, NULL); | |
150 | else { | |
151 | r = lkey >> (32 - hfi1_lkey_table_size); | |
152 | RCU_INIT_POINTER(rkt->table[r], NULL); | |
153 | } | |
154 | mr->lkey_published = 0; | |
155 | freed++; | |
156 | out: | |
157 | spin_unlock_irqrestore(&rkt->lock, flags); | |
158 | if (freed) { | |
159 | synchronize_rcu(); | |
160 | hfi1_put_mr(mr); | |
161 | } | |
162 | } | |
163 | ||
164 | /** | |
165 | * hfi1_lkey_ok - check IB SGE for validity and initialize | |
166 | * @rkt: table containing lkey to check SGE against | |
167 | * @pd: protection domain | |
168 | * @isge: outgoing internal SGE | |
169 | * @sge: SGE to check | |
170 | * @acc: access flags | |
171 | * | |
172 | * Return 1 if valid and successful, otherwise returns 0. | |
173 | * | |
174 | * increments the reference count upon success | |
175 | * | |
176 | * Check the IB SGE for validity and initialize our internal version | |
177 | * of it. | |
178 | */ | |
179 | int hfi1_lkey_ok(struct hfi1_lkey_table *rkt, struct hfi1_pd *pd, | |
180 | struct hfi1_sge *isge, struct ib_sge *sge, int acc) | |
181 | { | |
182 | struct hfi1_mregion *mr; | |
183 | unsigned n, m; | |
184 | size_t off; | |
185 | ||
186 | /* | |
187 | * We use LKEY == zero for kernel virtual addresses | |
188 | * (see hfi1_get_dma_mr and dma.c). | |
189 | */ | |
190 | rcu_read_lock(); | |
191 | if (sge->lkey == 0) { | |
192 | struct hfi1_ibdev *dev = to_idev(pd->ibpd.device); | |
193 | ||
194 | if (pd->user) | |
195 | goto bail; | |
196 | mr = rcu_dereference(dev->dma_mr); | |
197 | if (!mr) | |
198 | goto bail; | |
199 | atomic_inc(&mr->refcount); | |
200 | rcu_read_unlock(); | |
201 | ||
202 | isge->mr = mr; | |
203 | isge->vaddr = (void *) sge->addr; | |
204 | isge->length = sge->length; | |
205 | isge->sge_length = sge->length; | |
206 | isge->m = 0; | |
207 | isge->n = 0; | |
208 | goto ok; | |
209 | } | |
210 | mr = rcu_dereference( | |
211 | rkt->table[(sge->lkey >> (32 - hfi1_lkey_table_size))]); | |
212 | if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) | |
213 | goto bail; | |
214 | ||
215 | off = sge->addr - mr->user_base; | |
216 | if (unlikely(sge->addr < mr->user_base || | |
217 | off + sge->length > mr->length || | |
218 | (mr->access_flags & acc) != acc)) | |
219 | goto bail; | |
220 | atomic_inc(&mr->refcount); | |
221 | rcu_read_unlock(); | |
222 | ||
223 | off += mr->offset; | |
224 | if (mr->page_shift) { | |
225 | /* | |
226 | page sizes are uniform power of 2 so no loop is necessary | |
227 | entries_spanned_by_off is the number of times the loop below | |
228 | would have executed. | |
229 | */ | |
230 | size_t entries_spanned_by_off; | |
231 | ||
232 | entries_spanned_by_off = off >> mr->page_shift; | |
233 | off -= (entries_spanned_by_off << mr->page_shift); | |
234 | m = entries_spanned_by_off / HFI1_SEGSZ; | |
235 | n = entries_spanned_by_off % HFI1_SEGSZ; | |
236 | } else { | |
237 | m = 0; | |
238 | n = 0; | |
239 | while (off >= mr->map[m]->segs[n].length) { | |
240 | off -= mr->map[m]->segs[n].length; | |
241 | n++; | |
242 | if (n >= HFI1_SEGSZ) { | |
243 | m++; | |
244 | n = 0; | |
245 | } | |
246 | } | |
247 | } | |
248 | isge->mr = mr; | |
249 | isge->vaddr = mr->map[m]->segs[n].vaddr + off; | |
250 | isge->length = mr->map[m]->segs[n].length - off; | |
251 | isge->sge_length = sge->length; | |
252 | isge->m = m; | |
253 | isge->n = n; | |
254 | ok: | |
255 | return 1; | |
256 | bail: | |
257 | rcu_read_unlock(); | |
258 | return 0; | |
259 | } | |
260 | ||
261 | /** | |
262 | * hfi1_rkey_ok - check the IB virtual address, length, and RKEY | |
263 | * @qp: qp for validation | |
264 | * @sge: SGE state | |
265 | * @len: length of data | |
266 | * @vaddr: virtual address to place data | |
267 | * @rkey: rkey to check | |
268 | * @acc: access flags | |
269 | * | |
270 | * Return 1 if successful, otherwise 0. | |
271 | * | |
272 | * increments the reference count upon success | |
273 | */ | |
274 | int hfi1_rkey_ok(struct hfi1_qp *qp, struct hfi1_sge *sge, | |
275 | u32 len, u64 vaddr, u32 rkey, int acc) | |
276 | { | |
277 | struct hfi1_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; | |
278 | struct hfi1_mregion *mr; | |
279 | unsigned n, m; | |
280 | size_t off; | |
281 | ||
282 | /* | |
283 | * We use RKEY == zero for kernel virtual addresses | |
284 | * (see hfi1_get_dma_mr and dma.c). | |
285 | */ | |
286 | rcu_read_lock(); | |
287 | if (rkey == 0) { | |
288 | struct hfi1_pd *pd = to_ipd(qp->ibqp.pd); | |
289 | struct hfi1_ibdev *dev = to_idev(pd->ibpd.device); | |
290 | ||
291 | if (pd->user) | |
292 | goto bail; | |
293 | mr = rcu_dereference(dev->dma_mr); | |
294 | if (!mr) | |
295 | goto bail; | |
296 | atomic_inc(&mr->refcount); | |
297 | rcu_read_unlock(); | |
298 | ||
299 | sge->mr = mr; | |
300 | sge->vaddr = (void *) vaddr; | |
301 | sge->length = len; | |
302 | sge->sge_length = len; | |
303 | sge->m = 0; | |
304 | sge->n = 0; | |
305 | goto ok; | |
306 | } | |
307 | ||
308 | mr = rcu_dereference( | |
309 | rkt->table[(rkey >> (32 - hfi1_lkey_table_size))]); | |
310 | if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) | |
311 | goto bail; | |
312 | ||
313 | off = vaddr - mr->iova; | |
314 | if (unlikely(vaddr < mr->iova || off + len > mr->length || | |
315 | (mr->access_flags & acc) == 0)) | |
316 | goto bail; | |
317 | atomic_inc(&mr->refcount); | |
318 | rcu_read_unlock(); | |
319 | ||
320 | off += mr->offset; | |
321 | if (mr->page_shift) { | |
322 | /* | |
323 | page sizes are uniform power of 2 so no loop is necessary | |
324 | entries_spanned_by_off is the number of times the loop below | |
325 | would have executed. | |
326 | */ | |
327 | size_t entries_spanned_by_off; | |
328 | ||
329 | entries_spanned_by_off = off >> mr->page_shift; | |
330 | off -= (entries_spanned_by_off << mr->page_shift); | |
331 | m = entries_spanned_by_off / HFI1_SEGSZ; | |
332 | n = entries_spanned_by_off % HFI1_SEGSZ; | |
333 | } else { | |
334 | m = 0; | |
335 | n = 0; | |
336 | while (off >= mr->map[m]->segs[n].length) { | |
337 | off -= mr->map[m]->segs[n].length; | |
338 | n++; | |
339 | if (n >= HFI1_SEGSZ) { | |
340 | m++; | |
341 | n = 0; | |
342 | } | |
343 | } | |
344 | } | |
345 | sge->mr = mr; | |
346 | sge->vaddr = mr->map[m]->segs[n].vaddr + off; | |
347 | sge->length = mr->map[m]->segs[n].length - off; | |
348 | sge->sge_length = len; | |
349 | sge->m = m; | |
350 | sge->n = n; | |
351 | ok: | |
352 | return 1; | |
353 | bail: | |
354 | rcu_read_unlock(); | |
355 | return 0; | |
356 | } |