Commit | Line | Data |
---|---|---|
51fe6141 JG |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. | |
3 | * | |
4 | * The io_pagetable is the top of datastructure that maps IOVA's to PFNs. The | |
5 | * PFNs can be placed into an iommu_domain, or returned to the caller as a page | |
6 | * list for access by an in-kernel user. | |
7 | * | |
8 | * The datastructure uses the iopt_pages to optimize the storage of the PFNs | |
9 | * between the domains and xarray. | |
10 | */ | |
11 | #include <linux/iommufd.h> | |
12 | #include <linux/lockdep.h> | |
13 | #include <linux/iommu.h> | |
14 | #include <linux/sched/mm.h> | |
15 | #include <linux/err.h> | |
16 | #include <linux/slab.h> | |
17 | #include <linux/errno.h> | |
18 | ||
19 | #include "io_pagetable.h" | |
20 | #include "double_span.h" | |
21 | ||
22 | struct iopt_pages_list { | |
23 | struct iopt_pages *pages; | |
24 | struct iopt_area *area; | |
25 | struct list_head next; | |
26 | unsigned long start_byte; | |
27 | unsigned long length; | |
28 | }; | |
29 | ||
30 | struct iopt_area *iopt_area_contig_init(struct iopt_area_contig_iter *iter, | |
31 | struct io_pagetable *iopt, | |
32 | unsigned long iova, | |
33 | unsigned long last_iova) | |
34 | { | |
35 | lockdep_assert_held(&iopt->iova_rwsem); | |
36 | ||
37 | iter->cur_iova = iova; | |
38 | iter->last_iova = last_iova; | |
39 | iter->area = iopt_area_iter_first(iopt, iova, iova); | |
40 | if (!iter->area) | |
41 | return NULL; | |
42 | if (!iter->area->pages) { | |
43 | iter->area = NULL; | |
44 | return NULL; | |
45 | } | |
46 | return iter->area; | |
47 | } | |
48 | ||
49 | struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter) | |
50 | { | |
51 | unsigned long last_iova; | |
52 | ||
53 | if (!iter->area) | |
54 | return NULL; | |
55 | last_iova = iopt_area_last_iova(iter->area); | |
56 | if (iter->last_iova <= last_iova) | |
57 | return NULL; | |
58 | ||
59 | iter->cur_iova = last_iova + 1; | |
60 | iter->area = iopt_area_iter_next(iter->area, iter->cur_iova, | |
61 | iter->last_iova); | |
62 | if (!iter->area) | |
63 | return NULL; | |
64 | if (iter->cur_iova != iopt_area_iova(iter->area) || | |
65 | !iter->area->pages) { | |
66 | iter->area = NULL; | |
67 | return NULL; | |
68 | } | |
69 | return iter->area; | |
70 | } | |
71 | ||
72 | static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span, | |
73 | unsigned long length, | |
74 | unsigned long iova_alignment, | |
75 | unsigned long page_offset) | |
76 | { | |
77 | if (span->is_used || span->last_hole - span->start_hole < length - 1) | |
78 | return false; | |
79 | ||
80 | span->start_hole = ALIGN(span->start_hole, iova_alignment) | | |
81 | page_offset; | |
82 | if (span->start_hole > span->last_hole || | |
83 | span->last_hole - span->start_hole < length - 1) | |
84 | return false; | |
85 | return true; | |
86 | } | |
87 | ||
88 | static bool __alloc_iova_check_used(struct interval_tree_span_iter *span, | |
89 | unsigned long length, | |
90 | unsigned long iova_alignment, | |
91 | unsigned long page_offset) | |
92 | { | |
93 | if (span->is_hole || span->last_used - span->start_used < length - 1) | |
94 | return false; | |
95 | ||
96 | span->start_used = ALIGN(span->start_used, iova_alignment) | | |
97 | page_offset; | |
98 | if (span->start_used > span->last_used || | |
99 | span->last_used - span->start_used < length - 1) | |
100 | return false; | |
101 | return true; | |
102 | } | |
103 | ||
104 | /* | |
105 | * Automatically find a block of IOVA that is not being used and not reserved. | |
106 | * Does not return a 0 IOVA even if it is valid. | |
107 | */ | |
108 | static int iopt_alloc_iova(struct io_pagetable *iopt, unsigned long *iova, | |
109 | unsigned long uptr, unsigned long length) | |
110 | { | |
111 | unsigned long page_offset = uptr % PAGE_SIZE; | |
112 | struct interval_tree_double_span_iter used_span; | |
113 | struct interval_tree_span_iter allowed_span; | |
114 | unsigned long iova_alignment; | |
115 | ||
116 | lockdep_assert_held(&iopt->iova_rwsem); | |
117 | ||
118 | /* Protect roundup_pow-of_two() from overflow */ | |
119 | if (length == 0 || length >= ULONG_MAX / 2) | |
120 | return -EOVERFLOW; | |
121 | ||
122 | /* | |
123 | * Keep alignment present in the uptr when building the IOVA, this | |
124 | * increases the chance we can map a THP. | |
125 | */ | |
126 | if (!uptr) | |
127 | iova_alignment = roundup_pow_of_two(length); | |
128 | else | |
129 | iova_alignment = min_t(unsigned long, | |
130 | roundup_pow_of_two(length), | |
131 | 1UL << __ffs64(uptr)); | |
132 | ||
133 | if (iova_alignment < iopt->iova_alignment) | |
134 | return -EINVAL; | |
135 | ||
136 | interval_tree_for_each_span(&allowed_span, &iopt->allowed_itree, | |
137 | PAGE_SIZE, ULONG_MAX - PAGE_SIZE) { | |
138 | if (RB_EMPTY_ROOT(&iopt->allowed_itree.rb_root)) { | |
139 | allowed_span.start_used = PAGE_SIZE; | |
140 | allowed_span.last_used = ULONG_MAX - PAGE_SIZE; | |
141 | allowed_span.is_hole = false; | |
142 | } | |
143 | ||
144 | if (!__alloc_iova_check_used(&allowed_span, length, | |
145 | iova_alignment, page_offset)) | |
146 | continue; | |
147 | ||
148 | interval_tree_for_each_double_span( | |
149 | &used_span, &iopt->reserved_itree, &iopt->area_itree, | |
150 | allowed_span.start_used, allowed_span.last_used) { | |
151 | if (!__alloc_iova_check_hole(&used_span, length, | |
152 | iova_alignment, | |
153 | page_offset)) | |
154 | continue; | |
155 | ||
156 | *iova = used_span.start_hole; | |
157 | return 0; | |
158 | } | |
159 | } | |
160 | return -ENOSPC; | |
161 | } | |
162 | ||
163 | static int iopt_check_iova(struct io_pagetable *iopt, unsigned long iova, | |
164 | unsigned long length) | |
165 | { | |
166 | unsigned long last; | |
167 | ||
168 | lockdep_assert_held(&iopt->iova_rwsem); | |
169 | ||
170 | if ((iova & (iopt->iova_alignment - 1))) | |
171 | return -EINVAL; | |
172 | ||
173 | if (check_add_overflow(iova, length - 1, &last)) | |
174 | return -EOVERFLOW; | |
175 | ||
176 | /* No reserved IOVA intersects the range */ | |
177 | if (iopt_reserved_iter_first(iopt, iova, last)) | |
178 | return -EINVAL; | |
179 | ||
180 | /* Check that there is not already a mapping in the range */ | |
181 | if (iopt_area_iter_first(iopt, iova, last)) | |
182 | return -EEXIST; | |
183 | return 0; | |
184 | } | |
185 | ||
186 | /* | |
187 | * The area takes a slice of the pages from start_bytes to start_byte + length | |
188 | */ | |
189 | static int iopt_insert_area(struct io_pagetable *iopt, struct iopt_area *area, | |
190 | struct iopt_pages *pages, unsigned long iova, | |
191 | unsigned long start_byte, unsigned long length, | |
192 | int iommu_prot) | |
193 | { | |
194 | lockdep_assert_held_write(&iopt->iova_rwsem); | |
195 | ||
196 | if ((iommu_prot & IOMMU_WRITE) && !pages->writable) | |
197 | return -EPERM; | |
198 | ||
199 | area->iommu_prot = iommu_prot; | |
200 | area->page_offset = start_byte % PAGE_SIZE; | |
201 | if (area->page_offset & (iopt->iova_alignment - 1)) | |
202 | return -EINVAL; | |
203 | ||
204 | area->node.start = iova; | |
205 | if (check_add_overflow(iova, length - 1, &area->node.last)) | |
206 | return -EOVERFLOW; | |
207 | ||
208 | area->pages_node.start = start_byte / PAGE_SIZE; | |
209 | if (check_add_overflow(start_byte, length - 1, &area->pages_node.last)) | |
210 | return -EOVERFLOW; | |
211 | area->pages_node.last = area->pages_node.last / PAGE_SIZE; | |
212 | if (WARN_ON(area->pages_node.last >= pages->npages)) | |
213 | return -EOVERFLOW; | |
214 | ||
215 | /* | |
216 | * The area is inserted with a NULL pages indicating it is not fully | |
217 | * initialized yet. | |
218 | */ | |
219 | area->iopt = iopt; | |
220 | interval_tree_insert(&area->node, &iopt->area_itree); | |
221 | return 0; | |
222 | } | |
223 | ||
224 | static int iopt_alloc_area_pages(struct io_pagetable *iopt, | |
225 | struct list_head *pages_list, | |
226 | unsigned long length, unsigned long *dst_iova, | |
227 | int iommu_prot, unsigned int flags) | |
228 | { | |
229 | struct iopt_pages_list *elm; | |
230 | unsigned long iova; | |
231 | int rc = 0; | |
232 | ||
233 | list_for_each_entry(elm, pages_list, next) { | |
234 | elm->area = kzalloc(sizeof(*elm->area), GFP_KERNEL_ACCOUNT); | |
235 | if (!elm->area) | |
236 | return -ENOMEM; | |
237 | } | |
238 | ||
239 | down_write(&iopt->iova_rwsem); | |
240 | if ((length & (iopt->iova_alignment - 1)) || !length) { | |
241 | rc = -EINVAL; | |
242 | goto out_unlock; | |
243 | } | |
244 | ||
245 | if (flags & IOPT_ALLOC_IOVA) { | |
246 | /* Use the first entry to guess the ideal IOVA alignment */ | |
247 | elm = list_first_entry(pages_list, struct iopt_pages_list, | |
248 | next); | |
249 | rc = iopt_alloc_iova( | |
250 | iopt, dst_iova, | |
251 | (uintptr_t)elm->pages->uptr + elm->start_byte, length); | |
252 | if (rc) | |
253 | goto out_unlock; | |
254 | } else { | |
255 | rc = iopt_check_iova(iopt, *dst_iova, length); | |
256 | if (rc) | |
257 | goto out_unlock; | |
258 | } | |
259 | ||
260 | /* | |
261 | * Areas are created with a NULL pages so that the IOVA space is | |
262 | * reserved and we can unlock the iova_rwsem. | |
263 | */ | |
264 | iova = *dst_iova; | |
265 | list_for_each_entry(elm, pages_list, next) { | |
266 | rc = iopt_insert_area(iopt, elm->area, elm->pages, iova, | |
267 | elm->start_byte, elm->length, iommu_prot); | |
268 | if (rc) | |
269 | goto out_unlock; | |
270 | iova += elm->length; | |
271 | } | |
272 | ||
273 | out_unlock: | |
274 | up_write(&iopt->iova_rwsem); | |
275 | return rc; | |
276 | } | |
277 | ||
278 | static void iopt_abort_area(struct iopt_area *area) | |
279 | { | |
280 | if (area->iopt) { | |
281 | down_write(&area->iopt->iova_rwsem); | |
282 | interval_tree_remove(&area->node, &area->iopt->area_itree); | |
283 | up_write(&area->iopt->iova_rwsem); | |
284 | } | |
285 | kfree(area); | |
286 | } | |
287 | ||
288 | void iopt_free_pages_list(struct list_head *pages_list) | |
289 | { | |
290 | struct iopt_pages_list *elm; | |
291 | ||
292 | while ((elm = list_first_entry_or_null(pages_list, | |
293 | struct iopt_pages_list, next))) { | |
294 | if (elm->area) | |
295 | iopt_abort_area(elm->area); | |
296 | if (elm->pages) | |
297 | iopt_put_pages(elm->pages); | |
298 | list_del(&elm->next); | |
299 | kfree(elm); | |
300 | } | |
301 | } | |
302 | ||
303 | static int iopt_fill_domains_pages(struct list_head *pages_list) | |
304 | { | |
305 | struct iopt_pages_list *undo_elm; | |
306 | struct iopt_pages_list *elm; | |
307 | int rc; | |
308 | ||
309 | list_for_each_entry(elm, pages_list, next) { | |
310 | rc = iopt_area_fill_domains(elm->area, elm->pages); | |
311 | if (rc) | |
312 | goto err_undo; | |
313 | } | |
314 | return 0; | |
315 | ||
316 | err_undo: | |
317 | list_for_each_entry(undo_elm, pages_list, next) { | |
318 | if (undo_elm == elm) | |
319 | break; | |
320 | iopt_area_unfill_domains(undo_elm->area, undo_elm->pages); | |
321 | } | |
322 | return rc; | |
323 | } | |
324 | ||
325 | int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list, | |
326 | unsigned long length, unsigned long *dst_iova, | |
327 | int iommu_prot, unsigned int flags) | |
328 | { | |
329 | struct iopt_pages_list *elm; | |
330 | int rc; | |
331 | ||
332 | rc = iopt_alloc_area_pages(iopt, pages_list, length, dst_iova, | |
333 | iommu_prot, flags); | |
334 | if (rc) | |
335 | return rc; | |
336 | ||
337 | down_read(&iopt->domains_rwsem); | |
338 | rc = iopt_fill_domains_pages(pages_list); | |
339 | if (rc) | |
340 | goto out_unlock_domains; | |
341 | ||
342 | down_write(&iopt->iova_rwsem); | |
343 | list_for_each_entry(elm, pages_list, next) { | |
344 | /* | |
345 | * area->pages must be set inside the domains_rwsem to ensure | |
346 | * any newly added domains will get filled. Moves the reference | |
347 | * in from the list. | |
348 | */ | |
349 | elm->area->pages = elm->pages; | |
350 | elm->pages = NULL; | |
351 | elm->area = NULL; | |
352 | } | |
353 | up_write(&iopt->iova_rwsem); | |
354 | out_unlock_domains: | |
355 | up_read(&iopt->domains_rwsem); | |
356 | return rc; | |
357 | } | |
358 | ||
359 | /** | |
360 | * iopt_map_user_pages() - Map a user VA to an iova in the io page table | |
361 | * @ictx: iommufd_ctx the iopt is part of | |
362 | * @iopt: io_pagetable to act on | |
363 | * @iova: If IOPT_ALLOC_IOVA is set this is unused on input and contains | |
364 | * the chosen iova on output. Otherwise is the iova to map to on input | |
365 | * @uptr: User VA to map | |
366 | * @length: Number of bytes to map | |
367 | * @iommu_prot: Combination of IOMMU_READ/WRITE/etc bits for the mapping | |
368 | * @flags: IOPT_ALLOC_IOVA or zero | |
369 | * | |
370 | * iova, uptr, and length must be aligned to iova_alignment. For domain backed | |
371 | * page tables this will pin the pages and load them into the domain at iova. | |
372 | * For non-domain page tables this will only setup a lazy reference and the | |
373 | * caller must use iopt_access_pages() to touch them. | |
374 | * | |
375 | * iopt_unmap_iova() must be called to undo this before the io_pagetable can be | |
376 | * destroyed. | |
377 | */ | |
378 | int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, | |
379 | unsigned long *iova, void __user *uptr, | |
380 | unsigned long length, int iommu_prot, | |
381 | unsigned int flags) | |
382 | { | |
383 | struct iopt_pages_list elm = {}; | |
384 | LIST_HEAD(pages_list); | |
385 | int rc; | |
386 | ||
387 | elm.pages = iopt_alloc_pages(uptr, length, iommu_prot & IOMMU_WRITE); | |
388 | if (IS_ERR(elm.pages)) | |
389 | return PTR_ERR(elm.pages); | |
390 | if (ictx->account_mode == IOPT_PAGES_ACCOUNT_MM && | |
391 | elm.pages->account_mode == IOPT_PAGES_ACCOUNT_USER) | |
392 | elm.pages->account_mode = IOPT_PAGES_ACCOUNT_MM; | |
393 | elm.start_byte = uptr - elm.pages->uptr; | |
394 | elm.length = length; | |
395 | list_add(&elm.next, &pages_list); | |
396 | ||
397 | rc = iopt_map_pages(iopt, &pages_list, length, iova, iommu_prot, flags); | |
398 | if (rc) { | |
399 | if (elm.area) | |
400 | iopt_abort_area(elm.area); | |
401 | if (elm.pages) | |
402 | iopt_put_pages(elm.pages); | |
403 | return rc; | |
404 | } | |
405 | return 0; | |
406 | } | |
407 | ||
408 | int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova, | |
409 | unsigned long length, struct list_head *pages_list) | |
410 | { | |
411 | struct iopt_area_contig_iter iter; | |
412 | unsigned long last_iova; | |
413 | struct iopt_area *area; | |
414 | int rc; | |
415 | ||
416 | if (!length) | |
417 | return -EINVAL; | |
418 | if (check_add_overflow(iova, length - 1, &last_iova)) | |
419 | return -EOVERFLOW; | |
420 | ||
421 | down_read(&iopt->iova_rwsem); | |
422 | iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { | |
423 | struct iopt_pages_list *elm; | |
424 | unsigned long last = min(last_iova, iopt_area_last_iova(area)); | |
425 | ||
426 | elm = kzalloc(sizeof(*elm), GFP_KERNEL_ACCOUNT); | |
427 | if (!elm) { | |
428 | rc = -ENOMEM; | |
429 | goto err_free; | |
430 | } | |
431 | elm->start_byte = iopt_area_start_byte(area, iter.cur_iova); | |
432 | elm->pages = area->pages; | |
433 | elm->length = (last - iter.cur_iova) + 1; | |
434 | kref_get(&elm->pages->kref); | |
435 | list_add_tail(&elm->next, pages_list); | |
436 | } | |
437 | if (!iopt_area_contig_done(&iter)) { | |
438 | rc = -ENOENT; | |
439 | goto err_free; | |
440 | } | |
441 | up_read(&iopt->iova_rwsem); | |
442 | return 0; | |
443 | err_free: | |
444 | up_read(&iopt->iova_rwsem); | |
445 | iopt_free_pages_list(pages_list); | |
446 | return rc; | |
447 | } | |
448 | ||
449 | static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start, | |
450 | unsigned long last, unsigned long *unmapped) | |
451 | { | |
452 | struct iopt_area *area; | |
453 | unsigned long unmapped_bytes = 0; | |
454 | int rc = -ENOENT; | |
455 | ||
456 | /* | |
457 | * The domains_rwsem must be held in read mode any time any area->pages | |
458 | * is NULL. This prevents domain attach/detatch from running | |
459 | * concurrently with cleaning up the area. | |
460 | */ | |
461 | down_read(&iopt->domains_rwsem); | |
462 | down_write(&iopt->iova_rwsem); | |
463 | while ((area = iopt_area_iter_first(iopt, start, last))) { | |
464 | unsigned long area_last = iopt_area_last_iova(area); | |
465 | unsigned long area_first = iopt_area_iova(area); | |
466 | struct iopt_pages *pages; | |
467 | ||
468 | /* Userspace should not race map/unmap's of the same area */ | |
469 | if (!area->pages) { | |
470 | rc = -EBUSY; | |
471 | goto out_unlock_iova; | |
472 | } | |
473 | ||
474 | if (area_first < start || area_last > last) { | |
475 | rc = -ENOENT; | |
476 | goto out_unlock_iova; | |
477 | } | |
478 | ||
479 | /* | |
480 | * num_accesses writers must hold the iova_rwsem too, so we can | |
481 | * safely read it under the write side of the iovam_rwsem | |
482 | * without the pages->mutex. | |
483 | */ | |
484 | if (area->num_accesses) { | |
485 | start = area_first; | |
486 | area->prevent_access = true; | |
487 | up_write(&iopt->iova_rwsem); | |
488 | up_read(&iopt->domains_rwsem); | |
489 | /* Later patch calls back to drivers to unmap */ | |
490 | return -EBUSY; | |
491 | } | |
492 | ||
493 | pages = area->pages; | |
494 | area->pages = NULL; | |
495 | up_write(&iopt->iova_rwsem); | |
496 | ||
497 | iopt_area_unfill_domains(area, pages); | |
498 | iopt_abort_area(area); | |
499 | iopt_put_pages(pages); | |
500 | ||
501 | unmapped_bytes += area_last - area_first + 1; | |
502 | ||
503 | down_write(&iopt->iova_rwsem); | |
504 | } | |
505 | if (unmapped_bytes) | |
506 | rc = 0; | |
507 | ||
508 | out_unlock_iova: | |
509 | up_write(&iopt->iova_rwsem); | |
510 | up_read(&iopt->domains_rwsem); | |
511 | if (unmapped) | |
512 | *unmapped = unmapped_bytes; | |
513 | return rc; | |
514 | } | |
515 | ||
516 | /** | |
517 | * iopt_unmap_iova() - Remove a range of iova | |
518 | * @iopt: io_pagetable to act on | |
519 | * @iova: Starting iova to unmap | |
520 | * @length: Number of bytes to unmap | |
521 | * @unmapped: Return number of bytes unmapped | |
522 | * | |
523 | * The requested range must be a superset of existing ranges. | |
524 | * Splitting/truncating IOVA mappings is not allowed. | |
525 | */ | |
526 | int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, | |
527 | unsigned long length, unsigned long *unmapped) | |
528 | { | |
529 | unsigned long iova_last; | |
530 | ||
531 | if (!length) | |
532 | return -EINVAL; | |
533 | ||
534 | if (check_add_overflow(iova, length - 1, &iova_last)) | |
535 | return -EOVERFLOW; | |
536 | ||
537 | return iopt_unmap_iova_range(iopt, iova, iova_last, unmapped); | |
538 | } | |
539 | ||
540 | int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped) | |
541 | { | |
542 | int rc; | |
543 | ||
544 | rc = iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped); | |
545 | /* If the IOVAs are empty then unmap all succeeds */ | |
546 | if (rc == -ENOENT) | |
547 | return 0; | |
548 | return rc; | |
549 | } | |
550 | ||
551 | /* The caller must always free all the nodes in the allowed_iova rb_root. */ | |
552 | int iopt_set_allow_iova(struct io_pagetable *iopt, | |
553 | struct rb_root_cached *allowed_iova) | |
554 | { | |
555 | struct iopt_allowed *allowed; | |
556 | ||
557 | down_write(&iopt->iova_rwsem); | |
558 | swap(*allowed_iova, iopt->allowed_itree); | |
559 | ||
560 | for (allowed = iopt_allowed_iter_first(iopt, 0, ULONG_MAX); allowed; | |
561 | allowed = iopt_allowed_iter_next(allowed, 0, ULONG_MAX)) { | |
562 | if (iopt_reserved_iter_first(iopt, allowed->node.start, | |
563 | allowed->node.last)) { | |
564 | swap(*allowed_iova, iopt->allowed_itree); | |
565 | up_write(&iopt->iova_rwsem); | |
566 | return -EADDRINUSE; | |
567 | } | |
568 | } | |
569 | up_write(&iopt->iova_rwsem); | |
570 | return 0; | |
571 | } | |
572 | ||
573 | int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start, | |
574 | unsigned long last, void *owner) | |
575 | { | |
576 | struct iopt_reserved *reserved; | |
577 | ||
578 | lockdep_assert_held_write(&iopt->iova_rwsem); | |
579 | ||
580 | if (iopt_area_iter_first(iopt, start, last) || | |
581 | iopt_allowed_iter_first(iopt, start, last)) | |
582 | return -EADDRINUSE; | |
583 | ||
584 | reserved = kzalloc(sizeof(*reserved), GFP_KERNEL_ACCOUNT); | |
585 | if (!reserved) | |
586 | return -ENOMEM; | |
587 | reserved->node.start = start; | |
588 | reserved->node.last = last; | |
589 | reserved->owner = owner; | |
590 | interval_tree_insert(&reserved->node, &iopt->reserved_itree); | |
591 | return 0; | |
592 | } | |
593 | ||
594 | static void __iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner) | |
595 | { | |
596 | struct iopt_reserved *reserved, *next; | |
597 | ||
598 | lockdep_assert_held_write(&iopt->iova_rwsem); | |
599 | ||
600 | for (reserved = iopt_reserved_iter_first(iopt, 0, ULONG_MAX); reserved; | |
601 | reserved = next) { | |
602 | next = iopt_reserved_iter_next(reserved, 0, ULONG_MAX); | |
603 | ||
604 | if (reserved->owner == owner) { | |
605 | interval_tree_remove(&reserved->node, | |
606 | &iopt->reserved_itree); | |
607 | kfree(reserved); | |
608 | } | |
609 | } | |
610 | } | |
611 | ||
612 | void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner) | |
613 | { | |
614 | down_write(&iopt->iova_rwsem); | |
615 | __iopt_remove_reserved_iova(iopt, owner); | |
616 | up_write(&iopt->iova_rwsem); | |
617 | } | |
618 | ||
619 | void iopt_init_table(struct io_pagetable *iopt) | |
620 | { | |
621 | init_rwsem(&iopt->iova_rwsem); | |
622 | init_rwsem(&iopt->domains_rwsem); | |
623 | iopt->area_itree = RB_ROOT_CACHED; | |
624 | iopt->allowed_itree = RB_ROOT_CACHED; | |
625 | iopt->reserved_itree = RB_ROOT_CACHED; | |
626 | xa_init_flags(&iopt->domains, XA_FLAGS_ACCOUNT); | |
627 | xa_init_flags(&iopt->access_list, XA_FLAGS_ALLOC); | |
628 | ||
629 | /* | |
630 | * iopt's start as SW tables that can use the entire size_t IOVA space | |
631 | * due to the use of size_t in the APIs. They have no alignment | |
632 | * restriction. | |
633 | */ | |
634 | iopt->iova_alignment = 1; | |
635 | } | |
636 | ||
637 | void iopt_destroy_table(struct io_pagetable *iopt) | |
638 | { | |
639 | struct interval_tree_node *node; | |
640 | ||
641 | while ((node = interval_tree_iter_first(&iopt->allowed_itree, 0, | |
642 | ULONG_MAX))) { | |
643 | interval_tree_remove(node, &iopt->allowed_itree); | |
644 | kfree(container_of(node, struct iopt_allowed, node)); | |
645 | } | |
646 | ||
647 | WARN_ON(!RB_EMPTY_ROOT(&iopt->reserved_itree.rb_root)); | |
648 | WARN_ON(!xa_empty(&iopt->domains)); | |
649 | WARN_ON(!xa_empty(&iopt->access_list)); | |
650 | WARN_ON(!RB_EMPTY_ROOT(&iopt->area_itree.rb_root)); | |
651 | } | |
652 | ||
653 | /** | |
654 | * iopt_unfill_domain() - Unfill a domain with PFNs | |
655 | * @iopt: io_pagetable to act on | |
656 | * @domain: domain to unfill | |
657 | * | |
658 | * This is used when removing a domain from the iopt. Every area in the iopt | |
659 | * will be unmapped from the domain. The domain must already be removed from the | |
660 | * domains xarray. | |
661 | */ | |
662 | static void iopt_unfill_domain(struct io_pagetable *iopt, | |
663 | struct iommu_domain *domain) | |
664 | { | |
665 | struct iopt_area *area; | |
666 | ||
667 | lockdep_assert_held(&iopt->iova_rwsem); | |
668 | lockdep_assert_held_write(&iopt->domains_rwsem); | |
669 | ||
670 | /* | |
671 | * Some other domain is holding all the pfns still, rapidly unmap this | |
672 | * domain. | |
673 | */ | |
674 | if (iopt->next_domain_id != 0) { | |
675 | /* Pick an arbitrary remaining domain to act as storage */ | |
676 | struct iommu_domain *storage_domain = | |
677 | xa_load(&iopt->domains, 0); | |
678 | ||
679 | for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; | |
680 | area = iopt_area_iter_next(area, 0, ULONG_MAX)) { | |
681 | struct iopt_pages *pages = area->pages; | |
682 | ||
683 | if (!pages) | |
684 | continue; | |
685 | ||
686 | mutex_lock(&pages->mutex); | |
687 | if (area->storage_domain == domain) | |
688 | area->storage_domain = storage_domain; | |
689 | mutex_unlock(&pages->mutex); | |
690 | ||
691 | iopt_area_unmap_domain(area, domain); | |
692 | } | |
693 | return; | |
694 | } | |
695 | ||
696 | for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; | |
697 | area = iopt_area_iter_next(area, 0, ULONG_MAX)) { | |
698 | struct iopt_pages *pages = area->pages; | |
699 | ||
700 | if (!pages) | |
701 | continue; | |
702 | ||
703 | mutex_lock(&pages->mutex); | |
704 | interval_tree_remove(&area->pages_node, &pages->domains_itree); | |
705 | WARN_ON(area->storage_domain != domain); | |
706 | area->storage_domain = NULL; | |
707 | iopt_area_unfill_domain(area, pages, domain); | |
708 | mutex_unlock(&pages->mutex); | |
709 | } | |
710 | } | |
711 | ||
712 | /** | |
713 | * iopt_fill_domain() - Fill a domain with PFNs | |
714 | * @iopt: io_pagetable to act on | |
715 | * @domain: domain to fill | |
716 | * | |
717 | * Fill the domain with PFNs from every area in the iopt. On failure the domain | |
718 | * is left unchanged. | |
719 | */ | |
720 | static int iopt_fill_domain(struct io_pagetable *iopt, | |
721 | struct iommu_domain *domain) | |
722 | { | |
723 | struct iopt_area *end_area; | |
724 | struct iopt_area *area; | |
725 | int rc; | |
726 | ||
727 | lockdep_assert_held(&iopt->iova_rwsem); | |
728 | lockdep_assert_held_write(&iopt->domains_rwsem); | |
729 | ||
730 | for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; | |
731 | area = iopt_area_iter_next(area, 0, ULONG_MAX)) { | |
732 | struct iopt_pages *pages = area->pages; | |
733 | ||
734 | if (!pages) | |
735 | continue; | |
736 | ||
737 | mutex_lock(&pages->mutex); | |
738 | rc = iopt_area_fill_domain(area, domain); | |
739 | if (rc) { | |
740 | mutex_unlock(&pages->mutex); | |
741 | goto out_unfill; | |
742 | } | |
743 | if (!area->storage_domain) { | |
744 | WARN_ON(iopt->next_domain_id != 0); | |
745 | area->storage_domain = domain; | |
746 | interval_tree_insert(&area->pages_node, | |
747 | &pages->domains_itree); | |
748 | } | |
749 | mutex_unlock(&pages->mutex); | |
750 | } | |
751 | return 0; | |
752 | ||
753 | out_unfill: | |
754 | end_area = area; | |
755 | for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; | |
756 | area = iopt_area_iter_next(area, 0, ULONG_MAX)) { | |
757 | struct iopt_pages *pages = area->pages; | |
758 | ||
759 | if (area == end_area) | |
760 | break; | |
761 | if (!pages) | |
762 | continue; | |
763 | mutex_lock(&pages->mutex); | |
764 | if (iopt->next_domain_id == 0) { | |
765 | interval_tree_remove(&area->pages_node, | |
766 | &pages->domains_itree); | |
767 | area->storage_domain = NULL; | |
768 | } | |
769 | iopt_area_unfill_domain(area, pages, domain); | |
770 | mutex_unlock(&pages->mutex); | |
771 | } | |
772 | return rc; | |
773 | } | |
774 | ||
775 | /* All existing area's conform to an increased page size */ | |
776 | static int iopt_check_iova_alignment(struct io_pagetable *iopt, | |
777 | unsigned long new_iova_alignment) | |
778 | { | |
779 | unsigned long align_mask = new_iova_alignment - 1; | |
780 | struct iopt_area *area; | |
781 | ||
782 | lockdep_assert_held(&iopt->iova_rwsem); | |
783 | lockdep_assert_held(&iopt->domains_rwsem); | |
784 | ||
785 | for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; | |
786 | area = iopt_area_iter_next(area, 0, ULONG_MAX)) | |
787 | if ((iopt_area_iova(area) & align_mask) || | |
788 | (iopt_area_length(area) & align_mask) || | |
789 | (area->page_offset & align_mask)) | |
790 | return -EADDRINUSE; | |
791 | return 0; | |
792 | } | |
793 | ||
794 | int iopt_table_add_domain(struct io_pagetable *iopt, | |
795 | struct iommu_domain *domain) | |
796 | { | |
797 | const struct iommu_domain_geometry *geometry = &domain->geometry; | |
798 | struct iommu_domain *iter_domain; | |
799 | unsigned int new_iova_alignment; | |
800 | unsigned long index; | |
801 | int rc; | |
802 | ||
803 | down_write(&iopt->domains_rwsem); | |
804 | down_write(&iopt->iova_rwsem); | |
805 | ||
806 | xa_for_each(&iopt->domains, index, iter_domain) { | |
807 | if (WARN_ON(iter_domain == domain)) { | |
808 | rc = -EEXIST; | |
809 | goto out_unlock; | |
810 | } | |
811 | } | |
812 | ||
813 | /* | |
814 | * The io page size drives the iova_alignment. Internally the iopt_pages | |
815 | * works in PAGE_SIZE units and we adjust when mapping sub-PAGE_SIZE | |
816 | * objects into the iommu_domain. | |
817 | * | |
818 | * A iommu_domain must always be able to accept PAGE_SIZE to be | |
819 | * compatible as we can't guarantee higher contiguity. | |
820 | */ | |
821 | new_iova_alignment = max_t(unsigned long, | |
822 | 1UL << __ffs(domain->pgsize_bitmap), | |
823 | iopt->iova_alignment); | |
824 | if (new_iova_alignment > PAGE_SIZE) { | |
825 | rc = -EINVAL; | |
826 | goto out_unlock; | |
827 | } | |
828 | if (new_iova_alignment != iopt->iova_alignment) { | |
829 | rc = iopt_check_iova_alignment(iopt, new_iova_alignment); | |
830 | if (rc) | |
831 | goto out_unlock; | |
832 | } | |
833 | ||
834 | /* No area exists that is outside the allowed domain aperture */ | |
835 | if (geometry->aperture_start != 0) { | |
836 | rc = iopt_reserve_iova(iopt, 0, geometry->aperture_start - 1, | |
837 | domain); | |
838 | if (rc) | |
839 | goto out_reserved; | |
840 | } | |
841 | if (geometry->aperture_end != ULONG_MAX) { | |
842 | rc = iopt_reserve_iova(iopt, geometry->aperture_end + 1, | |
843 | ULONG_MAX, domain); | |
844 | if (rc) | |
845 | goto out_reserved; | |
846 | } | |
847 | ||
848 | rc = xa_reserve(&iopt->domains, iopt->next_domain_id, GFP_KERNEL); | |
849 | if (rc) | |
850 | goto out_reserved; | |
851 | ||
852 | rc = iopt_fill_domain(iopt, domain); | |
853 | if (rc) | |
854 | goto out_release; | |
855 | ||
856 | iopt->iova_alignment = new_iova_alignment; | |
857 | xa_store(&iopt->domains, iopt->next_domain_id, domain, GFP_KERNEL); | |
858 | iopt->next_domain_id++; | |
859 | up_write(&iopt->iova_rwsem); | |
860 | up_write(&iopt->domains_rwsem); | |
861 | return 0; | |
862 | out_release: | |
863 | xa_release(&iopt->domains, iopt->next_domain_id); | |
864 | out_reserved: | |
865 | __iopt_remove_reserved_iova(iopt, domain); | |
866 | out_unlock: | |
867 | up_write(&iopt->iova_rwsem); | |
868 | up_write(&iopt->domains_rwsem); | |
869 | return rc; | |
870 | } | |
871 | ||
872 | static int iopt_calculate_iova_alignment(struct io_pagetable *iopt) | |
873 | { | |
874 | unsigned long new_iova_alignment; | |
875 | struct iommufd_access *access; | |
876 | struct iommu_domain *domain; | |
877 | unsigned long index; | |
878 | ||
879 | lockdep_assert_held_write(&iopt->iova_rwsem); | |
880 | lockdep_assert_held(&iopt->domains_rwsem); | |
881 | ||
882 | /* See batch_iommu_map_small() */ | |
883 | if (iopt->disable_large_pages) | |
884 | new_iova_alignment = PAGE_SIZE; | |
885 | else | |
886 | new_iova_alignment = 1; | |
887 | ||
888 | xa_for_each(&iopt->domains, index, domain) | |
889 | new_iova_alignment = max_t(unsigned long, | |
890 | 1UL << __ffs(domain->pgsize_bitmap), | |
891 | new_iova_alignment); | |
892 | xa_for_each(&iopt->access_list, index, access) | |
893 | new_iova_alignment = max_t(unsigned long, | |
894 | access->iova_alignment, | |
895 | new_iova_alignment); | |
896 | ||
897 | if (new_iova_alignment > iopt->iova_alignment) { | |
898 | int rc; | |
899 | ||
900 | rc = iopt_check_iova_alignment(iopt, new_iova_alignment); | |
901 | if (rc) | |
902 | return rc; | |
903 | } | |
904 | iopt->iova_alignment = new_iova_alignment; | |
905 | return 0; | |
906 | } | |
907 | ||
908 | void iopt_table_remove_domain(struct io_pagetable *iopt, | |
909 | struct iommu_domain *domain) | |
910 | { | |
911 | struct iommu_domain *iter_domain = NULL; | |
912 | unsigned long index; | |
913 | ||
914 | down_write(&iopt->domains_rwsem); | |
915 | down_write(&iopt->iova_rwsem); | |
916 | ||
917 | xa_for_each(&iopt->domains, index, iter_domain) | |
918 | if (iter_domain == domain) | |
919 | break; | |
920 | if (WARN_ON(iter_domain != domain) || index >= iopt->next_domain_id) | |
921 | goto out_unlock; | |
922 | ||
923 | /* | |
924 | * Compress the xarray to keep it linear by swapping the entry to erase | |
925 | * with the tail entry and shrinking the tail. | |
926 | */ | |
927 | iopt->next_domain_id--; | |
928 | iter_domain = xa_erase(&iopt->domains, iopt->next_domain_id); | |
929 | if (index != iopt->next_domain_id) | |
930 | xa_store(&iopt->domains, index, iter_domain, GFP_KERNEL); | |
931 | ||
932 | iopt_unfill_domain(iopt, domain); | |
933 | __iopt_remove_reserved_iova(iopt, domain); | |
934 | ||
935 | WARN_ON(iopt_calculate_iova_alignment(iopt)); | |
936 | out_unlock: | |
937 | up_write(&iopt->iova_rwsem); | |
938 | up_write(&iopt->domains_rwsem); | |
939 | } | |
940 | ||
941 | /** | |
942 | * iopt_area_split - Split an area into two parts at iova | |
943 | * @area: The area to split | |
944 | * @iova: Becomes the last of a new area | |
945 | * | |
946 | * This splits an area into two. It is part of the VFIO compatibility to allow | |
947 | * poking a hole in the mapping. The two areas continue to point at the same | |
948 | * iopt_pages, just with different starting bytes. | |
949 | */ | |
950 | static int iopt_area_split(struct iopt_area *area, unsigned long iova) | |
951 | { | |
952 | unsigned long alignment = area->iopt->iova_alignment; | |
953 | unsigned long last_iova = iopt_area_last_iova(area); | |
954 | unsigned long start_iova = iopt_area_iova(area); | |
955 | unsigned long new_start = iova + 1; | |
956 | struct io_pagetable *iopt = area->iopt; | |
957 | struct iopt_pages *pages = area->pages; | |
958 | struct iopt_area *lhs; | |
959 | struct iopt_area *rhs; | |
960 | int rc; | |
961 | ||
962 | lockdep_assert_held_write(&iopt->iova_rwsem); | |
963 | ||
964 | if (iova == start_iova || iova == last_iova) | |
965 | return 0; | |
966 | ||
967 | if (!pages || area->prevent_access) | |
968 | return -EBUSY; | |
969 | ||
970 | if (new_start & (alignment - 1) || | |
971 | iopt_area_start_byte(area, new_start) & (alignment - 1)) | |
972 | return -EINVAL; | |
973 | ||
974 | lhs = kzalloc(sizeof(*area), GFP_KERNEL_ACCOUNT); | |
975 | if (!lhs) | |
976 | return -ENOMEM; | |
977 | ||
978 | rhs = kzalloc(sizeof(*area), GFP_KERNEL_ACCOUNT); | |
979 | if (!rhs) { | |
980 | rc = -ENOMEM; | |
981 | goto err_free_lhs; | |
982 | } | |
983 | ||
984 | mutex_lock(&pages->mutex); | |
985 | /* | |
986 | * Splitting is not permitted if an access exists, we don't track enough | |
987 | * information to split existing accesses. | |
988 | */ | |
989 | if (area->num_accesses) { | |
990 | rc = -EINVAL; | |
991 | goto err_unlock; | |
992 | } | |
993 | ||
994 | /* | |
995 | * Splitting is not permitted if a domain could have been mapped with | |
996 | * huge pages. | |
997 | */ | |
998 | if (area->storage_domain && !iopt->disable_large_pages) { | |
999 | rc = -EINVAL; | |
1000 | goto err_unlock; | |
1001 | } | |
1002 | ||
1003 | interval_tree_remove(&area->node, &iopt->area_itree); | |
1004 | rc = iopt_insert_area(iopt, lhs, area->pages, start_iova, | |
1005 | iopt_area_start_byte(area, start_iova), | |
1006 | (new_start - 1) - start_iova + 1, | |
1007 | area->iommu_prot); | |
1008 | if (WARN_ON(rc)) | |
1009 | goto err_insert; | |
1010 | ||
1011 | rc = iopt_insert_area(iopt, rhs, area->pages, new_start, | |
1012 | iopt_area_start_byte(area, new_start), | |
1013 | last_iova - new_start + 1, area->iommu_prot); | |
1014 | if (WARN_ON(rc)) | |
1015 | goto err_remove_lhs; | |
1016 | ||
1017 | lhs->storage_domain = area->storage_domain; | |
1018 | lhs->pages = area->pages; | |
1019 | rhs->storage_domain = area->storage_domain; | |
1020 | rhs->pages = area->pages; | |
1021 | kref_get(&rhs->pages->kref); | |
1022 | kfree(area); | |
1023 | mutex_unlock(&pages->mutex); | |
1024 | ||
1025 | /* | |
1026 | * No change to domains or accesses because the pages hasn't been | |
1027 | * changed | |
1028 | */ | |
1029 | return 0; | |
1030 | ||
1031 | err_remove_lhs: | |
1032 | interval_tree_remove(&lhs->node, &iopt->area_itree); | |
1033 | err_insert: | |
1034 | interval_tree_insert(&area->node, &iopt->area_itree); | |
1035 | err_unlock: | |
1036 | mutex_unlock(&pages->mutex); | |
1037 | kfree(rhs); | |
1038 | err_free_lhs: | |
1039 | kfree(lhs); | |
1040 | return rc; | |
1041 | } | |
1042 | ||
1043 | int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas, | |
1044 | size_t num_iovas) | |
1045 | { | |
1046 | int rc = 0; | |
1047 | int i; | |
1048 | ||
1049 | down_write(&iopt->iova_rwsem); | |
1050 | for (i = 0; i < num_iovas; i++) { | |
1051 | struct iopt_area *area; | |
1052 | ||
1053 | area = iopt_area_iter_first(iopt, iovas[i], iovas[i]); | |
1054 | if (!area) | |
1055 | continue; | |
1056 | rc = iopt_area_split(area, iovas[i]); | |
1057 | if (rc) | |
1058 | break; | |
1059 | } | |
1060 | up_write(&iopt->iova_rwsem); | |
1061 | return rc; | |
1062 | } | |
1063 | ||
1064 | void iopt_enable_large_pages(struct io_pagetable *iopt) | |
1065 | { | |
1066 | int rc; | |
1067 | ||
1068 | down_write(&iopt->domains_rwsem); | |
1069 | down_write(&iopt->iova_rwsem); | |
1070 | WRITE_ONCE(iopt->disable_large_pages, false); | |
1071 | rc = iopt_calculate_iova_alignment(iopt); | |
1072 | WARN_ON(rc); | |
1073 | up_write(&iopt->iova_rwsem); | |
1074 | up_write(&iopt->domains_rwsem); | |
1075 | } | |
1076 | ||
1077 | int iopt_disable_large_pages(struct io_pagetable *iopt) | |
1078 | { | |
1079 | int rc = 0; | |
1080 | ||
1081 | down_write(&iopt->domains_rwsem); | |
1082 | down_write(&iopt->iova_rwsem); | |
1083 | if (iopt->disable_large_pages) | |
1084 | goto out_unlock; | |
1085 | ||
1086 | /* Won't do it if domains already have pages mapped in them */ | |
1087 | if (!xa_empty(&iopt->domains) && | |
1088 | !RB_EMPTY_ROOT(&iopt->area_itree.rb_root)) { | |
1089 | rc = -EINVAL; | |
1090 | goto out_unlock; | |
1091 | } | |
1092 | ||
1093 | WRITE_ONCE(iopt->disable_large_pages, true); | |
1094 | rc = iopt_calculate_iova_alignment(iopt); | |
1095 | if (rc) | |
1096 | WRITE_ONCE(iopt->disable_large_pages, false); | |
1097 | out_unlock: | |
1098 | up_write(&iopt->iova_rwsem); | |
1099 | up_write(&iopt->domains_rwsem); | |
1100 | return rc; | |
1101 | } | |
1102 | ||
1103 | int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access) | |
1104 | { | |
1105 | int rc; | |
1106 | ||
1107 | down_write(&iopt->domains_rwsem); | |
1108 | down_write(&iopt->iova_rwsem); | |
1109 | rc = xa_alloc(&iopt->access_list, &access->iopt_access_list_id, access, | |
1110 | xa_limit_16b, GFP_KERNEL_ACCOUNT); | |
1111 | if (rc) | |
1112 | goto out_unlock; | |
1113 | ||
1114 | rc = iopt_calculate_iova_alignment(iopt); | |
1115 | if (rc) { | |
1116 | xa_erase(&iopt->access_list, access->iopt_access_list_id); | |
1117 | goto out_unlock; | |
1118 | } | |
1119 | ||
1120 | out_unlock: | |
1121 | up_write(&iopt->iova_rwsem); | |
1122 | up_write(&iopt->domains_rwsem); | |
1123 | return rc; | |
1124 | } | |
1125 | ||
1126 | void iopt_remove_access(struct io_pagetable *iopt, | |
1127 | struct iommufd_access *access) | |
1128 | { | |
1129 | down_write(&iopt->domains_rwsem); | |
1130 | down_write(&iopt->iova_rwsem); | |
1131 | WARN_ON(xa_erase(&iopt->access_list, access->iopt_access_list_id) != | |
1132 | access); | |
1133 | WARN_ON(iopt_calculate_iova_alignment(iopt)); | |
1134 | up_write(&iopt->iova_rwsem); | |
1135 | up_write(&iopt->domains_rwsem); | |
1136 | } | |
1137 | ||
1138 | /* Narrow the valid_iova_itree to include reserved ranges from a group. */ | |
1139 | int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt, | |
1140 | struct device *device, | |
1141 | struct iommu_group *group, | |
1142 | phys_addr_t *sw_msi_start) | |
1143 | { | |
1144 | struct iommu_resv_region *resv; | |
1145 | struct iommu_resv_region *tmp; | |
1146 | LIST_HEAD(group_resv_regions); | |
1147 | int rc; | |
1148 | ||
1149 | down_write(&iopt->iova_rwsem); | |
1150 | rc = iommu_get_group_resv_regions(group, &group_resv_regions); | |
1151 | if (rc) | |
1152 | goto out_unlock; | |
1153 | ||
1154 | list_for_each_entry(resv, &group_resv_regions, list) { | |
1155 | if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE) | |
1156 | continue; | |
1157 | ||
1158 | /* | |
1159 | * The presence of any 'real' MSI regions should take precedence | |
1160 | * over the software-managed one if the IOMMU driver happens to | |
1161 | * advertise both types. | |
1162 | */ | |
1163 | if (sw_msi_start && resv->type == IOMMU_RESV_MSI) { | |
1164 | *sw_msi_start = 0; | |
1165 | sw_msi_start = NULL; | |
1166 | } | |
1167 | if (sw_msi_start && resv->type == IOMMU_RESV_SW_MSI) | |
1168 | *sw_msi_start = resv->start; | |
1169 | ||
1170 | rc = iopt_reserve_iova(iopt, resv->start, | |
1171 | resv->length - 1 + resv->start, device); | |
1172 | if (rc) | |
1173 | goto out_reserved; | |
1174 | } | |
1175 | rc = 0; | |
1176 | goto out_free_resv; | |
1177 | ||
1178 | out_reserved: | |
1179 | __iopt_remove_reserved_iova(iopt, device); | |
1180 | out_free_resv: | |
1181 | list_for_each_entry_safe(resv, tmp, &group_resv_regions, list) | |
1182 | kfree(resv); | |
1183 | out_unlock: | |
1184 | up_write(&iopt->iova_rwsem); | |
1185 | return rc; | |
1186 | } |