Commit | Line | Data |
---|---|---|
f727a0c3 | 1 | /* |
05d6ac1d | 2 | * Copyright(c) 2015, 2016 Intel Corporation. |
f727a0c3 MH |
3 | * |
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | |
5 | * redistributing this file, you may do so under either license. | |
6 | * | |
7 | * GPL LICENSE SUMMARY | |
8 | * | |
f727a0c3 MH |
9 | * This program is free software; you can redistribute it and/or modify |
10 | * it under the terms of version 2 of the GNU General Public License as | |
11 | * published by the Free Software Foundation. | |
12 | * | |
13 | * This program is distributed in the hope that it will be useful, but | |
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | * General Public License for more details. | |
17 | * | |
18 | * BSD LICENSE | |
19 | * | |
f727a0c3 MH |
20 | * Redistribution and use in source and binary forms, with or without |
21 | * modification, are permitted provided that the following conditions | |
22 | * are met: | |
23 | * | |
24 | * - Redistributions of source code must retain the above copyright | |
25 | * notice, this list of conditions and the following disclaimer. | |
26 | * - Redistributions in binary form must reproduce the above copyright | |
27 | * notice, this list of conditions and the following disclaimer in | |
28 | * the documentation and/or other materials provided with the | |
29 | * distribution. | |
30 | * - Neither the name of Intel Corporation nor the names of its | |
31 | * contributors may be used to endorse or promote products derived | |
32 | * from this software without specific prior written permission. | |
33 | * | |
34 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
35 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
36 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
37 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
38 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
39 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
40 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
41 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
42 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
43 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
44 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
45 | * | |
46 | */ | |
47 | #include <asm/page.h> | |
48 | ||
49 | #include "user_exp_rcv.h" | |
50 | #include "trace.h" | |
06e0ffa6 | 51 | #include "mmu_rb.h" |
f727a0c3 | 52 | |
b8abe346 MH |
53 | struct tid_group { |
54 | struct list_head list; | |
55 | unsigned base; | |
56 | u8 size; | |
57 | u8 used; | |
58 | u8 map; | |
59 | }; | |
60 | ||
06e0ffa6 MH |
61 | struct tid_rb_node { |
62 | struct mmu_rb_node mmu; | |
f727a0c3 | 63 | unsigned long phys; |
f727a0c3 MH |
64 | struct tid_group *grp; |
65 | u32 rcventry; | |
66 | dma_addr_t dma_addr; | |
67 | bool freed; | |
68 | unsigned npages; | |
69 | struct page *pages[0]; | |
70 | }; | |
71 | ||
f88e0c8a MH |
72 | struct tid_pageset { |
73 | u16 idx; | |
74 | u16 count; | |
75 | }; | |
76 | ||
b8abe346 MH |
77 | #define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list)) |
78 | ||
3abb33ac MH |
79 | #define num_user_pages(vaddr, len) \ |
80 | (1 + (((((unsigned long)(vaddr) + \ | |
81 | (unsigned long)(len) - 1) & PAGE_MASK) - \ | |
82 | ((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT)) | |
83 | ||
f88e0c8a | 84 | static void unlock_exp_tids(struct hfi1_ctxtdata *, struct exp_tid_set *, |
3abb33ac | 85 | struct rb_root *); |
7e7a436e | 86 | static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *); |
f88e0c8a | 87 | static int set_rcvarray_entry(struct file *, unsigned long, u32, |
3abb33ac | 88 | struct tid_group *, struct page **, unsigned); |
06e0ffa6 | 89 | static int mmu_rb_insert(struct rb_root *, struct mmu_rb_node *); |
f19bd643 MH |
90 | static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *, |
91 | struct mm_struct *); | |
06e0ffa6 | 92 | static int mmu_rb_invalidate(struct rb_root *, struct mmu_rb_node *); |
f88e0c8a MH |
93 | static int program_rcvarray(struct file *, unsigned long, struct tid_group *, |
94 | struct tid_pageset *, unsigned, u16, struct page **, | |
7e7a436e | 95 | u32 *, unsigned *, unsigned *); |
455d7f1a | 96 | static int unprogram_rcvarray(struct file *, u32, struct tid_group **); |
06e0ffa6 MH |
97 | static void clear_tid_node(struct hfi1_filedata *, u16, struct tid_rb_node *); |
98 | ||
99 | static struct mmu_rb_ops tid_rb_ops = { | |
06e0ffa6 MH |
100 | .insert = mmu_rb_insert, |
101 | .remove = mmu_rb_remove, | |
102 | .invalidate = mmu_rb_invalidate | |
103 | }; | |
f88e0c8a MH |
104 | |
105 | static inline u32 rcventry2tidinfo(u32 rcventry) | |
106 | { | |
107 | u32 pair = rcventry & ~0x1; | |
108 | ||
109 | return EXP_TID_SET(IDX, pair >> 1) | | |
110 | EXP_TID_SET(CTRL, 1 << (rcventry - pair)); | |
111 | } | |
f727a0c3 | 112 | |
b8abe346 MH |
113 | static inline void exp_tid_group_init(struct exp_tid_set *set) |
114 | { | |
115 | INIT_LIST_HEAD(&set->list); | |
116 | set->count = 0; | |
117 | } | |
118 | ||
119 | static inline void tid_group_remove(struct tid_group *grp, | |
120 | struct exp_tid_set *set) | |
121 | { | |
122 | list_del_init(&grp->list); | |
123 | set->count--; | |
124 | } | |
125 | ||
126 | static inline void tid_group_add_tail(struct tid_group *grp, | |
127 | struct exp_tid_set *set) | |
128 | { | |
129 | list_add_tail(&grp->list, &set->list); | |
130 | set->count++; | |
131 | } | |
132 | ||
133 | static inline struct tid_group *tid_group_pop(struct exp_tid_set *set) | |
134 | { | |
135 | struct tid_group *grp = | |
136 | list_first_entry(&set->list, struct tid_group, list); | |
137 | list_del_init(&grp->list); | |
138 | set->count--; | |
139 | return grp; | |
140 | } | |
141 | ||
142 | static inline void tid_group_move(struct tid_group *group, | |
143 | struct exp_tid_set *s1, | |
144 | struct exp_tid_set *s2) | |
145 | { | |
146 | tid_group_remove(group, s1); | |
147 | tid_group_add_tail(group, s2); | |
148 | } | |
149 | ||
f727a0c3 MH |
150 | /* |
151 | * Initialize context and file private data needed for Expected | |
152 | * receive caching. This needs to be done after the context has | |
153 | * been configured with the eager/expected RcvEntry counts. | |
154 | */ | |
155 | int hfi1_user_exp_rcv_init(struct file *fp) | |
156 | { | |
3abb33ac MH |
157 | struct hfi1_filedata *fd = fp->private_data; |
158 | struct hfi1_ctxtdata *uctxt = fd->uctxt; | |
159 | struct hfi1_devdata *dd = uctxt->dd; | |
160 | unsigned tidbase; | |
161 | int i, ret = 0; | |
162 | ||
3abb33ac MH |
163 | spin_lock_init(&fd->tid_lock); |
164 | spin_lock_init(&fd->invalid_lock); | |
3abb33ac MH |
165 | fd->tid_rb_root = RB_ROOT; |
166 | ||
167 | if (!uctxt->subctxt_cnt || !fd->subctxt) { | |
168 | exp_tid_group_init(&uctxt->tid_group_list); | |
169 | exp_tid_group_init(&uctxt->tid_used_list); | |
170 | exp_tid_group_init(&uctxt->tid_full_list); | |
171 | ||
172 | tidbase = uctxt->expected_base; | |
173 | for (i = 0; i < uctxt->expected_count / | |
174 | dd->rcv_entries.group_size; i++) { | |
175 | struct tid_group *grp; | |
176 | ||
177 | grp = kzalloc(sizeof(*grp), GFP_KERNEL); | |
178 | if (!grp) { | |
179 | /* | |
180 | * If we fail here, the groups already | |
181 | * allocated will be freed by the close | |
182 | * call. | |
183 | */ | |
184 | ret = -ENOMEM; | |
185 | goto done; | |
186 | } | |
187 | grp->size = dd->rcv_entries.group_size; | |
188 | grp->base = tidbase; | |
189 | tid_group_add_tail(grp, &uctxt->tid_group_list); | |
190 | tidbase += dd->rcv_entries.group_size; | |
191 | } | |
192 | } | |
193 | ||
a92ba6d6 MH |
194 | fd->entry_to_rb = kcalloc(uctxt->expected_count, |
195 | sizeof(struct rb_node *), | |
196 | GFP_KERNEL); | |
197 | if (!fd->entry_to_rb) | |
198 | return -ENOMEM; | |
199 | ||
3abb33ac MH |
200 | if (!HFI1_CAP_IS_USET(TID_UNMAP)) { |
201 | fd->invalid_tid_idx = 0; | |
202 | fd->invalid_tids = kzalloc(uctxt->expected_count * | |
203 | sizeof(u32), GFP_KERNEL); | |
204 | if (!fd->invalid_tids) { | |
205 | ret = -ENOMEM; | |
206 | goto done; | |
a92ba6d6 MH |
207 | } |
208 | ||
209 | /* | |
210 | * Register MMU notifier callbacks. If the registration | |
211 | * fails, continue but turn off the TID caching for | |
212 | * all user contexts. | |
213 | */ | |
06e0ffa6 | 214 | ret = hfi1_mmu_rb_register(&fd->tid_rb_root, &tid_rb_ops); |
a92ba6d6 MH |
215 | if (ret) { |
216 | dd_dev_info(dd, | |
217 | "Failed MMU notifier registration %d\n", | |
218 | ret); | |
219 | HFI1_CAP_USET(TID_UNMAP); | |
220 | ret = 0; | |
3abb33ac MH |
221 | } |
222 | } | |
223 | ||
3abb33ac MH |
224 | /* |
225 | * PSM does not have a good way to separate, count, and | |
226 | * effectively enforce a limit on RcvArray entries used by | |
227 | * subctxts (when context sharing is used) when TID caching | |
228 | * is enabled. To help with that, we calculate a per-process | |
229 | * RcvArray entry share and enforce that. | |
230 | * If TID caching is not in use, PSM deals with usage on its | |
231 | * own. In that case, we allow any subctxt to take all of the | |
232 | * entries. | |
233 | * | |
234 | * Make sure that we set the tid counts only after successful | |
235 | * init. | |
236 | */ | |
455d7f1a | 237 | spin_lock(&fd->tid_lock); |
3abb33ac MH |
238 | if (uctxt->subctxt_cnt && !HFI1_CAP_IS_USET(TID_UNMAP)) { |
239 | u16 remainder; | |
240 | ||
241 | fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt; | |
242 | remainder = uctxt->expected_count % uctxt->subctxt_cnt; | |
243 | if (remainder && fd->subctxt < remainder) | |
244 | fd->tid_limit++; | |
245 | } else { | |
246 | fd->tid_limit = uctxt->expected_count; | |
247 | } | |
455d7f1a | 248 | spin_unlock(&fd->tid_lock); |
3abb33ac MH |
249 | done: |
250 | return ret; | |
f727a0c3 MH |
251 | } |
252 | ||
253 | int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) | |
254 | { | |
3abb33ac MH |
255 | struct hfi1_ctxtdata *uctxt = fd->uctxt; |
256 | struct tid_group *grp, *gptr; | |
257 | ||
94158442 MH |
258 | if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags)) |
259 | return 0; | |
3abb33ac MH |
260 | /* |
261 | * The notifier would have been removed when the process'es mm | |
262 | * was freed. | |
263 | */ | |
06e0ffa6 MH |
264 | if (!HFI1_CAP_IS_USET(TID_UNMAP)) |
265 | hfi1_mmu_rb_unregister(&fd->tid_rb_root); | |
3abb33ac MH |
266 | |
267 | kfree(fd->invalid_tids); | |
268 | ||
269 | if (!uctxt->cnt) { | |
270 | if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) | |
271 | unlock_exp_tids(uctxt, &uctxt->tid_full_list, | |
272 | &fd->tid_rb_root); | |
273 | if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) | |
274 | unlock_exp_tids(uctxt, &uctxt->tid_used_list, | |
275 | &fd->tid_rb_root); | |
276 | list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list, | |
277 | list) { | |
278 | list_del_init(&grp->list); | |
279 | kfree(grp); | |
280 | } | |
3abb33ac MH |
281 | hfi1_clear_tids(uctxt); |
282 | } | |
a92ba6d6 MH |
283 | |
284 | kfree(fd->entry_to_rb); | |
3abb33ac | 285 | return 0; |
f727a0c3 MH |
286 | } |
287 | ||
b8abe346 MH |
288 | /* |
289 | * Write an "empty" RcvArray entry. | |
290 | * This function exists so the TID registaration code can use it | |
291 | * to write to unused/unneeded entries and still take advantage | |
292 | * of the WC performance improvements. The HFI will ignore this | |
293 | * write to the RcvArray entry. | |
294 | */ | |
295 | static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index) | |
296 | { | |
297 | /* | |
298 | * Doing the WC fill writes only makes sense if the device is | |
299 | * present and the RcvArray has been mapped as WC memory. | |
300 | */ | |
301 | if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc) | |
302 | writeq(0, dd->rcvarray_wc + (index * 8)); | |
303 | } | |
304 | ||
7e7a436e MH |
305 | /* |
306 | * RcvArray entry allocation for Expected Receives is done by the | |
307 | * following algorithm: | |
308 | * | |
309 | * The context keeps 3 lists of groups of RcvArray entries: | |
310 | * 1. List of empty groups - tid_group_list | |
311 | * This list is created during user context creation and | |
312 | * contains elements which describe sets (of 8) of empty | |
313 | * RcvArray entries. | |
314 | * 2. List of partially used groups - tid_used_list | |
315 | * This list contains sets of RcvArray entries which are | |
316 | * not completely used up. Another mapping request could | |
317 | * use some of all of the remaining entries. | |
318 | * 3. List of full groups - tid_full_list | |
319 | * This is the list where sets that are completely used | |
320 | * up go. | |
321 | * | |
322 | * An attempt to optimize the usage of RcvArray entries is | |
323 | * made by finding all sets of physically contiguous pages in a | |
324 | * user's buffer. | |
325 | * These physically contiguous sets are further split into | |
326 | * sizes supported by the receive engine of the HFI. The | |
327 | * resulting sets of pages are stored in struct tid_pageset, | |
328 | * which describes the sets as: | |
329 | * * .count - number of pages in this set | |
330 | * * .idx - starting index into struct page ** array | |
331 | * of this set | |
332 | * | |
333 | * From this point on, the algorithm deals with the page sets | |
334 | * described above. The number of pagesets is divided by the | |
335 | * RcvArray group size to produce the number of full groups | |
336 | * needed. | |
337 | * | |
338 | * Groups from the 3 lists are manipulated using the following | |
339 | * rules: | |
340 | * 1. For each set of 8 pagesets, a complete group from | |
341 | * tid_group_list is taken, programmed, and moved to | |
342 | * the tid_full_list list. | |
343 | * 2. For all remaining pagesets: | |
344 | * 2.1 If the tid_used_list is empty and the tid_group_list | |
345 | * is empty, stop processing pageset and return only | |
346 | * what has been programmed up to this point. | |
347 | * 2.2 If the tid_used_list is empty and the tid_group_list | |
348 | * is not empty, move a group from tid_group_list to | |
349 | * tid_used_list. | |
350 | * 2.3 For each group is tid_used_group, program as much as | |
351 | * can fit into the group. If the group becomes fully | |
352 | * used, move it to tid_full_list. | |
353 | */ | |
f727a0c3 MH |
354 | int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo) |
355 | { | |
7e7a436e MH |
356 | int ret = 0, need_group = 0, pinned; |
357 | struct hfi1_filedata *fd = fp->private_data; | |
358 | struct hfi1_ctxtdata *uctxt = fd->uctxt; | |
359 | struct hfi1_devdata *dd = uctxt->dd; | |
360 | unsigned npages, ngroups, pageidx = 0, pageset_count, npagesets, | |
361 | tididx = 0, mapped, mapped_pages = 0; | |
362 | unsigned long vaddr = tinfo->vaddr; | |
363 | struct page **pages = NULL; | |
364 | u32 *tidlist = NULL; | |
365 | struct tid_pageset *pagesets = NULL; | |
366 | ||
367 | /* Get the number of pages the user buffer spans */ | |
368 | npages = num_user_pages(vaddr, tinfo->length); | |
369 | if (!npages) | |
370 | return -EINVAL; | |
371 | ||
372 | if (npages > uctxt->expected_count) { | |
373 | dd_dev_err(dd, "Expected buffer too big\n"); | |
374 | return -EINVAL; | |
375 | } | |
376 | ||
377 | /* Verify that access is OK for the user buffer */ | |
378 | if (!access_ok(VERIFY_WRITE, (void __user *)vaddr, | |
379 | npages * PAGE_SIZE)) { | |
380 | dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n", | |
381 | (void *)vaddr, npages); | |
382 | return -EFAULT; | |
383 | } | |
384 | ||
385 | pagesets = kcalloc(uctxt->expected_count, sizeof(*pagesets), | |
386 | GFP_KERNEL); | |
387 | if (!pagesets) | |
388 | return -ENOMEM; | |
389 | ||
390 | /* Allocate the array of struct page pointers needed for pinning */ | |
391 | pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); | |
392 | if (!pages) { | |
393 | ret = -ENOMEM; | |
394 | goto bail; | |
395 | } | |
396 | ||
397 | /* | |
398 | * Pin all the pages of the user buffer. If we can't pin all the | |
399 | * pages, accept the amount pinned so far and program only that. | |
400 | * User space knows how to deal with partially programmed buffers. | |
401 | */ | |
a7922f7d MH |
402 | if (!hfi1_can_pin_pages(dd, fd->tid_n_pinned, npages)) |
403 | return -ENOMEM; | |
7e7a436e MH |
404 | pinned = hfi1_acquire_user_pages(vaddr, npages, true, pages); |
405 | if (pinned <= 0) { | |
406 | ret = pinned; | |
407 | goto bail; | |
408 | } | |
a7922f7d | 409 | fd->tid_n_pinned += npages; |
7e7a436e MH |
410 | |
411 | /* Find sets of physically contiguous pages */ | |
412 | npagesets = find_phys_blocks(pages, pinned, pagesets); | |
413 | ||
414 | /* | |
415 | * We don't need to access this under a lock since tid_used is per | |
416 | * process and the same process cannot be in hfi1_user_exp_rcv_clear() | |
417 | * and hfi1_user_exp_rcv_setup() at the same time. | |
418 | */ | |
419 | spin_lock(&fd->tid_lock); | |
420 | if (fd->tid_used + npagesets > fd->tid_limit) | |
421 | pageset_count = fd->tid_limit - fd->tid_used; | |
422 | else | |
423 | pageset_count = npagesets; | |
424 | spin_unlock(&fd->tid_lock); | |
425 | ||
426 | if (!pageset_count) | |
427 | goto bail; | |
428 | ||
429 | ngroups = pageset_count / dd->rcv_entries.group_size; | |
430 | tidlist = kcalloc(pageset_count, sizeof(*tidlist), GFP_KERNEL); | |
431 | if (!tidlist) { | |
432 | ret = -ENOMEM; | |
433 | goto nomem; | |
434 | } | |
435 | ||
436 | tididx = 0; | |
437 | ||
438 | /* | |
439 | * From this point on, we are going to be using shared (between master | |
440 | * and subcontexts) context resources. We need to take the lock. | |
441 | */ | |
442 | mutex_lock(&uctxt->exp_lock); | |
443 | /* | |
444 | * The first step is to program the RcvArray entries which are complete | |
445 | * groups. | |
446 | */ | |
447 | while (ngroups && uctxt->tid_group_list.count) { | |
448 | struct tid_group *grp = | |
449 | tid_group_pop(&uctxt->tid_group_list); | |
450 | ||
451 | ret = program_rcvarray(fp, vaddr, grp, pagesets, | |
452 | pageidx, dd->rcv_entries.group_size, | |
453 | pages, tidlist, &tididx, &mapped); | |
454 | /* | |
455 | * If there was a failure to program the RcvArray | |
456 | * entries for the entire group, reset the grp fields | |
457 | * and add the grp back to the free group list. | |
458 | */ | |
459 | if (ret <= 0) { | |
460 | tid_group_add_tail(grp, &uctxt->tid_group_list); | |
461 | hfi1_cdbg(TID, | |
462 | "Failed to program RcvArray group %d", ret); | |
463 | goto unlock; | |
464 | } | |
465 | ||
466 | tid_group_add_tail(grp, &uctxt->tid_full_list); | |
467 | ngroups--; | |
468 | pageidx += ret; | |
469 | mapped_pages += mapped; | |
470 | } | |
471 | ||
472 | while (pageidx < pageset_count) { | |
473 | struct tid_group *grp, *ptr; | |
474 | /* | |
475 | * If we don't have any partially used tid groups, check | |
476 | * if we have empty groups. If so, take one from there and | |
477 | * put in the partially used list. | |
478 | */ | |
479 | if (!uctxt->tid_used_list.count || need_group) { | |
480 | if (!uctxt->tid_group_list.count) | |
481 | goto unlock; | |
482 | ||
483 | grp = tid_group_pop(&uctxt->tid_group_list); | |
484 | tid_group_add_tail(grp, &uctxt->tid_used_list); | |
485 | need_group = 0; | |
486 | } | |
487 | /* | |
488 | * There is an optimization opportunity here - instead of | |
489 | * fitting as many page sets as we can, check for a group | |
490 | * later on in the list that could fit all of them. | |
491 | */ | |
492 | list_for_each_entry_safe(grp, ptr, &uctxt->tid_used_list.list, | |
493 | list) { | |
494 | unsigned use = min_t(unsigned, pageset_count - pageidx, | |
495 | grp->size - grp->used); | |
496 | ||
497 | ret = program_rcvarray(fp, vaddr, grp, pagesets, | |
498 | pageidx, use, pages, tidlist, | |
499 | &tididx, &mapped); | |
500 | if (ret < 0) { | |
501 | hfi1_cdbg(TID, | |
502 | "Failed to program RcvArray entries %d", | |
503 | ret); | |
504 | ret = -EFAULT; | |
505 | goto unlock; | |
506 | } else if (ret > 0) { | |
507 | if (grp->used == grp->size) | |
508 | tid_group_move(grp, | |
509 | &uctxt->tid_used_list, | |
510 | &uctxt->tid_full_list); | |
511 | pageidx += ret; | |
512 | mapped_pages += mapped; | |
513 | need_group = 0; | |
514 | /* Check if we are done so we break out early */ | |
515 | if (pageidx >= pageset_count) | |
516 | break; | |
517 | } else if (WARN_ON(ret == 0)) { | |
518 | /* | |
519 | * If ret is 0, we did not program any entries | |
520 | * into this group, which can only happen if | |
521 | * we've screwed up the accounting somewhere. | |
522 | * Warn and try to continue. | |
523 | */ | |
524 | need_group = 1; | |
525 | } | |
526 | } | |
527 | } | |
528 | unlock: | |
529 | mutex_unlock(&uctxt->exp_lock); | |
530 | nomem: | |
531 | hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx, | |
532 | mapped_pages, ret); | |
533 | if (tididx) { | |
534 | spin_lock(&fd->tid_lock); | |
535 | fd->tid_used += tididx; | |
536 | spin_unlock(&fd->tid_lock); | |
537 | tinfo->tidcnt = tididx; | |
538 | tinfo->length = mapped_pages * PAGE_SIZE; | |
539 | ||
540 | if (copy_to_user((void __user *)(unsigned long)tinfo->tidlist, | |
541 | tidlist, sizeof(tidlist[0]) * tididx)) { | |
542 | /* | |
543 | * On failure to copy to the user level, we need to undo | |
544 | * everything done so far so we don't leak resources. | |
545 | */ | |
546 | tinfo->tidlist = (unsigned long)&tidlist; | |
547 | hfi1_user_exp_rcv_clear(fp, tinfo); | |
548 | tinfo->tidlist = 0; | |
549 | ret = -EFAULT; | |
550 | goto bail; | |
551 | } | |
552 | } | |
553 | ||
554 | /* | |
555 | * If not everything was mapped (due to insufficient RcvArray entries, | |
556 | * for example), unpin all unmapped pages so we can pin them nex time. | |
557 | */ | |
a7922f7d | 558 | if (mapped_pages != pinned) { |
bd3a8947 | 559 | hfi1_release_user_pages(current->mm, &pages[mapped_pages], |
7e7a436e MH |
560 | pinned - mapped_pages, |
561 | false); | |
a7922f7d MH |
562 | fd->tid_n_pinned -= pinned - mapped_pages; |
563 | } | |
7e7a436e MH |
564 | bail: |
565 | kfree(pagesets); | |
566 | kfree(pages); | |
567 | kfree(tidlist); | |
568 | return ret > 0 ? 0 : ret; | |
f727a0c3 MH |
569 | } |
570 | ||
571 | int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo) | |
572 | { | |
455d7f1a MH |
573 | int ret = 0; |
574 | struct hfi1_filedata *fd = fp->private_data; | |
575 | struct hfi1_ctxtdata *uctxt = fd->uctxt; | |
576 | u32 *tidinfo; | |
577 | unsigned tididx; | |
578 | ||
579 | tidinfo = kcalloc(tinfo->tidcnt, sizeof(*tidinfo), GFP_KERNEL); | |
580 | if (!tidinfo) | |
581 | return -ENOMEM; | |
582 | ||
583 | if (copy_from_user(tidinfo, (void __user *)(unsigned long) | |
584 | tinfo->tidlist, sizeof(tidinfo[0]) * | |
585 | tinfo->tidcnt)) { | |
586 | ret = -EFAULT; | |
587 | goto done; | |
588 | } | |
589 | ||
590 | mutex_lock(&uctxt->exp_lock); | |
591 | for (tididx = 0; tididx < tinfo->tidcnt; tididx++) { | |
592 | ret = unprogram_rcvarray(fp, tidinfo[tididx], NULL); | |
593 | if (ret) { | |
594 | hfi1_cdbg(TID, "Failed to unprogram rcv array %d", | |
595 | ret); | |
596 | break; | |
597 | } | |
598 | } | |
599 | spin_lock(&fd->tid_lock); | |
600 | fd->tid_used -= tididx; | |
601 | spin_unlock(&fd->tid_lock); | |
602 | tinfo->tidcnt = tididx; | |
603 | mutex_unlock(&uctxt->exp_lock); | |
604 | done: | |
605 | kfree(tidinfo); | |
606 | return ret; | |
f727a0c3 MH |
607 | } |
608 | ||
609 | int hfi1_user_exp_rcv_invalid(struct file *fp, struct hfi1_tid_info *tinfo) | |
610 | { | |
455d7f1a MH |
611 | struct hfi1_filedata *fd = fp->private_data; |
612 | struct hfi1_ctxtdata *uctxt = fd->uctxt; | |
613 | unsigned long *ev = uctxt->dd->events + | |
614 | (((uctxt->ctxt - uctxt->dd->first_user_ctxt) * | |
615 | HFI1_MAX_SHARED_CTXTS) + fd->subctxt); | |
616 | u32 *array; | |
617 | int ret = 0; | |
618 | ||
619 | if (!fd->invalid_tids) | |
620 | return -EINVAL; | |
621 | ||
622 | /* | |
623 | * copy_to_user() can sleep, which will leave the invalid_lock | |
624 | * locked and cause the MMU notifier to be blocked on the lock | |
625 | * for a long time. | |
626 | * Copy the data to a local buffer so we can release the lock. | |
627 | */ | |
628 | array = kcalloc(uctxt->expected_count, sizeof(*array), GFP_KERNEL); | |
629 | if (!array) | |
630 | return -EFAULT; | |
631 | ||
632 | spin_lock(&fd->invalid_lock); | |
633 | if (fd->invalid_tid_idx) { | |
634 | memcpy(array, fd->invalid_tids, sizeof(*array) * | |
635 | fd->invalid_tid_idx); | |
636 | memset(fd->invalid_tids, 0, sizeof(*fd->invalid_tids) * | |
637 | fd->invalid_tid_idx); | |
638 | tinfo->tidcnt = fd->invalid_tid_idx; | |
639 | fd->invalid_tid_idx = 0; | |
640 | /* | |
641 | * Reset the user flag while still holding the lock. | |
642 | * Otherwise, PSM can miss events. | |
643 | */ | |
644 | clear_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev); | |
645 | } else { | |
646 | tinfo->tidcnt = 0; | |
647 | } | |
648 | spin_unlock(&fd->invalid_lock); | |
649 | ||
650 | if (tinfo->tidcnt) { | |
651 | if (copy_to_user((void __user *)tinfo->tidlist, | |
652 | array, sizeof(*array) * tinfo->tidcnt)) | |
653 | ret = -EFAULT; | |
654 | } | |
655 | kfree(array); | |
656 | ||
657 | return ret; | |
f727a0c3 MH |
658 | } |
659 | ||
f88e0c8a MH |
660 | static u32 find_phys_blocks(struct page **pages, unsigned npages, |
661 | struct tid_pageset *list) | |
662 | { | |
663 | unsigned pagecount, pageidx, setcount = 0, i; | |
664 | unsigned long pfn, this_pfn; | |
665 | ||
666 | if (!npages) | |
667 | return 0; | |
668 | ||
669 | /* | |
670 | * Look for sets of physically contiguous pages in the user buffer. | |
671 | * This will allow us to optimize Expected RcvArray entry usage by | |
672 | * using the bigger supported sizes. | |
673 | */ | |
674 | pfn = page_to_pfn(pages[0]); | |
675 | for (pageidx = 0, pagecount = 1, i = 1; i <= npages; i++) { | |
676 | this_pfn = i < npages ? page_to_pfn(pages[i]) : 0; | |
677 | ||
678 | /* | |
679 | * If the pfn's are not sequential, pages are not physically | |
680 | * contiguous. | |
681 | */ | |
682 | if (this_pfn != ++pfn) { | |
683 | /* | |
684 | * At this point we have to loop over the set of | |
685 | * physically contiguous pages and break them down it | |
686 | * sizes supported by the HW. | |
687 | * There are two main constraints: | |
688 | * 1. The max buffer size is MAX_EXPECTED_BUFFER. | |
689 | * If the total set size is bigger than that | |
690 | * program only a MAX_EXPECTED_BUFFER chunk. | |
691 | * 2. The buffer size has to be a power of two. If | |
692 | * it is not, round down to the closes power of | |
693 | * 2 and program that size. | |
694 | */ | |
695 | while (pagecount) { | |
696 | int maxpages = pagecount; | |
697 | u32 bufsize = pagecount * PAGE_SIZE; | |
698 | ||
699 | if (bufsize > MAX_EXPECTED_BUFFER) | |
700 | maxpages = | |
701 | MAX_EXPECTED_BUFFER >> | |
702 | PAGE_SHIFT; | |
703 | else if (!is_power_of_2(bufsize)) | |
704 | maxpages = | |
705 | rounddown_pow_of_two(bufsize) >> | |
706 | PAGE_SHIFT; | |
707 | ||
708 | list[setcount].idx = pageidx; | |
709 | list[setcount].count = maxpages; | |
710 | pagecount -= maxpages; | |
711 | pageidx += maxpages; | |
712 | setcount++; | |
713 | } | |
714 | pageidx = i; | |
715 | pagecount = 1; | |
716 | pfn = this_pfn; | |
717 | } else { | |
718 | pagecount++; | |
719 | } | |
720 | } | |
721 | return setcount; | |
722 | } | |
723 | ||
724 | /** | |
725 | * program_rcvarray() - program an RcvArray group with receive buffers | |
726 | * @fp: file pointer | |
727 | * @vaddr: starting user virtual address | |
728 | * @grp: RcvArray group | |
729 | * @sets: array of struct tid_pageset holding information on physically | |
730 | * contiguous chunks from the user buffer | |
731 | * @start: starting index into sets array | |
732 | * @count: number of struct tid_pageset's to program | |
733 | * @pages: an array of struct page * for the user buffer | |
734 | * @tidlist: the array of u32 elements when the information about the | |
735 | * programmed RcvArray entries is to be encoded. | |
736 | * @tididx: starting offset into tidlist | |
737 | * @pmapped: (output parameter) number of pages programmed into the RcvArray | |
738 | * entries. | |
739 | * | |
740 | * This function will program up to 'count' number of RcvArray entries from the | |
741 | * group 'grp'. To make best use of write-combining writes, the function will | |
742 | * perform writes to the unused RcvArray entries which will be ignored by the | |
743 | * HW. Each RcvArray entry will be programmed with a physically contiguous | |
744 | * buffer chunk from the user's virtual buffer. | |
745 | * | |
746 | * Return: | |
747 | * -EINVAL if the requested count is larger than the size of the group, | |
748 | * -ENOMEM or -EFAULT on error from set_rcvarray_entry(), or | |
749 | * number of RcvArray entries programmed. | |
750 | */ | |
751 | static int program_rcvarray(struct file *fp, unsigned long vaddr, | |
752 | struct tid_group *grp, | |
753 | struct tid_pageset *sets, | |
754 | unsigned start, u16 count, struct page **pages, | |
755 | u32 *tidlist, unsigned *tididx, unsigned *pmapped) | |
756 | { | |
757 | struct hfi1_filedata *fd = fp->private_data; | |
758 | struct hfi1_ctxtdata *uctxt = fd->uctxt; | |
759 | struct hfi1_devdata *dd = uctxt->dd; | |
760 | u16 idx; | |
761 | u32 tidinfo = 0, rcventry, useidx = 0; | |
762 | int mapped = 0; | |
763 | ||
764 | /* Count should never be larger than the group size */ | |
765 | if (count > grp->size) | |
766 | return -EINVAL; | |
767 | ||
768 | /* Find the first unused entry in the group */ | |
769 | for (idx = 0; idx < grp->size; idx++) { | |
770 | if (!(grp->map & (1 << idx))) { | |
771 | useidx = idx; | |
772 | break; | |
773 | } | |
774 | rcv_array_wc_fill(dd, grp->base + idx); | |
775 | } | |
776 | ||
777 | idx = 0; | |
778 | while (idx < count) { | |
779 | u16 npages, pageidx, setidx = start + idx; | |
780 | int ret = 0; | |
781 | ||
782 | /* | |
783 | * If this entry in the group is used, move to the next one. | |
784 | * If we go past the end of the group, exit the loop. | |
785 | */ | |
786 | if (useidx >= grp->size) { | |
787 | break; | |
788 | } else if (grp->map & (1 << useidx)) { | |
789 | rcv_array_wc_fill(dd, grp->base + useidx); | |
790 | useidx++; | |
791 | continue; | |
792 | } | |
793 | ||
794 | rcventry = grp->base + useidx; | |
795 | npages = sets[setidx].count; | |
796 | pageidx = sets[setidx].idx; | |
797 | ||
798 | ret = set_rcvarray_entry(fp, vaddr + (pageidx * PAGE_SIZE), | |
799 | rcventry, grp, pages + pageidx, | |
800 | npages); | |
801 | if (ret) | |
802 | return ret; | |
803 | mapped += npages; | |
804 | ||
805 | tidinfo = rcventry2tidinfo(rcventry - uctxt->expected_base) | | |
806 | EXP_TID_SET(LEN, npages); | |
807 | tidlist[(*tididx)++] = tidinfo; | |
808 | grp->used++; | |
809 | grp->map |= 1 << useidx++; | |
810 | idx++; | |
811 | } | |
812 | ||
813 | /* Fill the rest of the group with "blank" writes */ | |
814 | for (; useidx < grp->size; useidx++) | |
815 | rcv_array_wc_fill(dd, grp->base + useidx); | |
816 | *pmapped = mapped; | |
817 | return idx; | |
818 | } | |
819 | ||
820 | static int set_rcvarray_entry(struct file *fp, unsigned long vaddr, | |
821 | u32 rcventry, struct tid_group *grp, | |
822 | struct page **pages, unsigned npages) | |
823 | { | |
824 | int ret; | |
825 | struct hfi1_filedata *fd = fp->private_data; | |
826 | struct hfi1_ctxtdata *uctxt = fd->uctxt; | |
06e0ffa6 | 827 | struct tid_rb_node *node; |
f88e0c8a MH |
828 | struct hfi1_devdata *dd = uctxt->dd; |
829 | struct rb_root *root = &fd->tid_rb_root; | |
830 | dma_addr_t phys; | |
831 | ||
832 | /* | |
833 | * Allocate the node first so we can handle a potential | |
834 | * failure before we've programmed anything. | |
835 | */ | |
836 | node = kzalloc(sizeof(*node) + (sizeof(struct page *) * npages), | |
837 | GFP_KERNEL); | |
838 | if (!node) | |
839 | return -ENOMEM; | |
840 | ||
841 | phys = pci_map_single(dd->pcidev, | |
842 | __va(page_to_phys(pages[0])), | |
843 | npages * PAGE_SIZE, PCI_DMA_FROMDEVICE); | |
844 | if (dma_mapping_error(&dd->pcidev->dev, phys)) { | |
845 | dd_dev_err(dd, "Failed to DMA map Exp Rcv pages 0x%llx\n", | |
846 | phys); | |
847 | kfree(node); | |
848 | return -EFAULT; | |
849 | } | |
850 | ||
06e0ffa6 MH |
851 | node->mmu.addr = vaddr; |
852 | node->mmu.len = npages * PAGE_SIZE; | |
f88e0c8a | 853 | node->phys = page_to_phys(pages[0]); |
f88e0c8a MH |
854 | node->npages = npages; |
855 | node->rcventry = rcventry; | |
856 | node->dma_addr = phys; | |
857 | node->grp = grp; | |
858 | node->freed = false; | |
859 | memcpy(node->pages, pages, sizeof(struct page *) * npages); | |
860 | ||
368f2b59 MH |
861 | if (HFI1_CAP_IS_USET(TID_UNMAP)) |
862 | ret = mmu_rb_insert(root, &node->mmu); | |
863 | else | |
864 | ret = hfi1_mmu_rb_insert(root, &node->mmu); | |
f88e0c8a MH |
865 | |
866 | if (ret) { | |
867 | hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d", | |
06e0ffa6 | 868 | node->rcventry, node->mmu.addr, node->phys, ret); |
f88e0c8a MH |
869 | pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE, |
870 | PCI_DMA_FROMDEVICE); | |
871 | kfree(node); | |
872 | return -EFAULT; | |
873 | } | |
874 | hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1); | |
06e0ffa6 MH |
875 | trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages, |
876 | node->mmu.addr, node->phys, phys); | |
f88e0c8a MH |
877 | return 0; |
878 | } | |
879 | ||
880 | static int unprogram_rcvarray(struct file *fp, u32 tidinfo, | |
881 | struct tid_group **grp) | |
882 | { | |
883 | struct hfi1_filedata *fd = fp->private_data; | |
884 | struct hfi1_ctxtdata *uctxt = fd->uctxt; | |
885 | struct hfi1_devdata *dd = uctxt->dd; | |
06e0ffa6 | 886 | struct tid_rb_node *node; |
f88e0c8a | 887 | u8 tidctrl = EXP_TID_GET(tidinfo, CTRL); |
a92ba6d6 | 888 | u32 tididx = EXP_TID_GET(tidinfo, IDX) << 1, rcventry; |
f88e0c8a MH |
889 | |
890 | if (tididx >= uctxt->expected_count) { | |
891 | dd_dev_err(dd, "Invalid RcvArray entry (%u) index for ctxt %u\n", | |
892 | tididx, uctxt->ctxt); | |
893 | return -EINVAL; | |
894 | } | |
895 | ||
896 | if (tidctrl == 0x3) | |
897 | return -EINVAL; | |
898 | ||
a92ba6d6 | 899 | rcventry = tididx + (tidctrl - 1); |
f88e0c8a | 900 | |
a92ba6d6 | 901 | node = fd->entry_to_rb[rcventry]; |
06e0ffa6 | 902 | if (!node || node->rcventry != (uctxt->expected_base + rcventry)) |
f88e0c8a | 903 | return -EBADF; |
368f2b59 | 904 | if (HFI1_CAP_IS_USET(TID_UNMAP)) |
f19bd643 | 905 | mmu_rb_remove(&fd->tid_rb_root, &node->mmu, NULL); |
368f2b59 MH |
906 | else |
907 | hfi1_mmu_rb_remove(&fd->tid_rb_root, &node->mmu); | |
06e0ffa6 | 908 | |
f88e0c8a MH |
909 | if (grp) |
910 | *grp = node->grp; | |
911 | clear_tid_node(fd, fd->subctxt, node); | |
912 | return 0; | |
913 | } | |
914 | ||
915 | static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt, | |
06e0ffa6 | 916 | struct tid_rb_node *node) |
f88e0c8a MH |
917 | { |
918 | struct hfi1_ctxtdata *uctxt = fd->uctxt; | |
919 | struct hfi1_devdata *dd = uctxt->dd; | |
920 | ||
0b091fb3 | 921 | trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry, |
06e0ffa6 | 922 | node->npages, node->mmu.addr, node->phys, |
0b091fb3 MH |
923 | node->dma_addr); |
924 | ||
f88e0c8a MH |
925 | hfi1_put_tid(dd, node->rcventry, PT_INVALID, 0, 0); |
926 | /* | |
927 | * Make sure device has seen the write before we unpin the | |
928 | * pages. | |
929 | */ | |
930 | flush_wc(); | |
931 | ||
06e0ffa6 | 932 | pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len, |
f88e0c8a | 933 | PCI_DMA_FROMDEVICE); |
bd3a8947 | 934 | hfi1_release_user_pages(current->mm, node->pages, node->npages, true); |
a7922f7d | 935 | fd->tid_n_pinned -= node->npages; |
f88e0c8a MH |
936 | |
937 | node->grp->used--; | |
938 | node->grp->map &= ~(1 << (node->rcventry - node->grp->base)); | |
939 | ||
940 | if (node->grp->used == node->grp->size - 1) | |
941 | tid_group_move(node->grp, &uctxt->tid_full_list, | |
942 | &uctxt->tid_used_list); | |
943 | else if (!node->grp->used) | |
944 | tid_group_move(node->grp, &uctxt->tid_used_list, | |
945 | &uctxt->tid_group_list); | |
946 | kfree(node); | |
947 | } | |
948 | ||
949 | static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, | |
950 | struct exp_tid_set *set, struct rb_root *root) | |
951 | { | |
952 | struct tid_group *grp, *ptr; | |
953 | struct hfi1_filedata *fd = container_of(root, struct hfi1_filedata, | |
954 | tid_rb_root); | |
955 | int i; | |
956 | ||
957 | list_for_each_entry_safe(grp, ptr, &set->list, list) { | |
958 | list_del_init(&grp->list); | |
959 | ||
f88e0c8a MH |
960 | for (i = 0; i < grp->size; i++) { |
961 | if (grp->map & (1 << i)) { | |
962 | u16 rcventry = grp->base + i; | |
06e0ffa6 | 963 | struct tid_rb_node *node; |
f88e0c8a | 964 | |
a92ba6d6 MH |
965 | node = fd->entry_to_rb[rcventry - |
966 | uctxt->expected_base]; | |
967 | if (!node || node->rcventry != rcventry) | |
f88e0c8a | 968 | continue; |
368f2b59 MH |
969 | if (HFI1_CAP_IS_USET(TID_UNMAP)) |
970 | mmu_rb_remove(&fd->tid_rb_root, | |
f19bd643 | 971 | &node->mmu, NULL); |
368f2b59 MH |
972 | else |
973 | hfi1_mmu_rb_remove(&fd->tid_rb_root, | |
974 | &node->mmu); | |
f88e0c8a MH |
975 | clear_tid_node(fd, -1, node); |
976 | } | |
977 | } | |
f88e0c8a MH |
978 | } |
979 | } | |
980 | ||
06e0ffa6 | 981 | static int mmu_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode) |
f727a0c3 | 982 | { |
06e0ffa6 MH |
983 | struct hfi1_filedata *fdata = |
984 | container_of(root, struct hfi1_filedata, tid_rb_root); | |
985 | struct hfi1_ctxtdata *uctxt = fdata->uctxt; | |
986 | struct tid_rb_node *node = | |
987 | container_of(mnode, struct tid_rb_node, mmu); | |
f727a0c3 | 988 | |
06e0ffa6 MH |
989 | if (node->freed) |
990 | return 0; | |
b5eb3b2f | 991 | |
06e0ffa6 MH |
992 | trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->mmu.addr, |
993 | node->rcventry, node->npages, node->dma_addr); | |
994 | node->freed = true; | |
0b091fb3 | 995 | |
06e0ffa6 MH |
996 | spin_lock(&fdata->invalid_lock); |
997 | if (fdata->invalid_tid_idx < uctxt->expected_count) { | |
998 | fdata->invalid_tids[fdata->invalid_tid_idx] = | |
999 | rcventry2tidinfo(node->rcventry - uctxt->expected_base); | |
1000 | fdata->invalid_tids[fdata->invalid_tid_idx] |= | |
1001 | EXP_TID_SET(LEN, node->npages); | |
1002 | if (!fdata->invalid_tid_idx) { | |
1003 | unsigned long *ev; | |
b5eb3b2f | 1004 | |
b5eb3b2f | 1005 | /* |
06e0ffa6 MH |
1006 | * hfi1_set_uevent_bits() sets a user event flag |
1007 | * for all processes. Because calling into the | |
1008 | * driver to process TID cache invalidations is | |
1009 | * expensive and TID cache invalidations are | |
1010 | * handled on a per-process basis, we can | |
1011 | * optimize this to set the flag only for the | |
1012 | * process in question. | |
b5eb3b2f | 1013 | */ |
06e0ffa6 MH |
1014 | ev = uctxt->dd->events + |
1015 | (((uctxt->ctxt - uctxt->dd->first_user_ctxt) * | |
1016 | HFI1_MAX_SHARED_CTXTS) + fdata->subctxt); | |
1017 | set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev); | |
b5eb3b2f | 1018 | } |
06e0ffa6 | 1019 | fdata->invalid_tid_idx++; |
b5eb3b2f | 1020 | } |
06e0ffa6 MH |
1021 | spin_unlock(&fdata->invalid_lock); |
1022 | return 0; | |
f727a0c3 MH |
1023 | } |
1024 | ||
06e0ffa6 | 1025 | static int mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *node) |
f727a0c3 | 1026 | { |
06e0ffa6 MH |
1027 | struct hfi1_filedata *fdata = |
1028 | container_of(root, struct hfi1_filedata, tid_rb_root); | |
1029 | struct tid_rb_node *tnode = | |
1030 | container_of(node, struct tid_rb_node, mmu); | |
a92ba6d6 | 1031 | u32 base = fdata->uctxt->expected_base; |
f727a0c3 | 1032 | |
06e0ffa6 | 1033 | fdata->entry_to_rb[tnode->rcventry - base] = tnode; |
f727a0c3 MH |
1034 | return 0; |
1035 | } | |
1036 | ||
909e2cd0 | 1037 | static void mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node, |
f19bd643 | 1038 | struct mm_struct *mm) |
a92ba6d6 | 1039 | { |
06e0ffa6 MH |
1040 | struct hfi1_filedata *fdata = |
1041 | container_of(root, struct hfi1_filedata, tid_rb_root); | |
1042 | struct tid_rb_node *tnode = | |
1043 | container_of(node, struct tid_rb_node, mmu); | |
a92ba6d6 MH |
1044 | u32 base = fdata->uctxt->expected_base; |
1045 | ||
06e0ffa6 | 1046 | fdata->entry_to_rb[tnode->rcventry - base] = NULL; |
a92ba6d6 | 1047 | } |