Commit | Line | Data |
---|---|---|
51fe6141 JG |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. | |
3 | * | |
4 | * The io_pagetable is the top of datastructure that maps IOVA's to PFNs. The | |
5 | * PFNs can be placed into an iommu_domain, or returned to the caller as a page | |
6 | * list for access by an in-kernel user. | |
7 | * | |
8 | * The datastructure uses the iopt_pages to optimize the storage of the PFNs | |
9 | * between the domains and xarray. | |
10 | */ | |
11 | #include <linux/iommufd.h> | |
12 | #include <linux/lockdep.h> | |
13 | #include <linux/iommu.h> | |
14 | #include <linux/sched/mm.h> | |
15 | #include <linux/err.h> | |
16 | #include <linux/slab.h> | |
17 | #include <linux/errno.h> | |
18 | ||
19 | #include "io_pagetable.h" | |
20 | #include "double_span.h" | |
21 | ||
22 | struct iopt_pages_list { | |
23 | struct iopt_pages *pages; | |
24 | struct iopt_area *area; | |
25 | struct list_head next; | |
26 | unsigned long start_byte; | |
27 | unsigned long length; | |
28 | }; | |
29 | ||
30 | struct iopt_area *iopt_area_contig_init(struct iopt_area_contig_iter *iter, | |
31 | struct io_pagetable *iopt, | |
32 | unsigned long iova, | |
33 | unsigned long last_iova) | |
34 | { | |
35 | lockdep_assert_held(&iopt->iova_rwsem); | |
36 | ||
37 | iter->cur_iova = iova; | |
38 | iter->last_iova = last_iova; | |
39 | iter->area = iopt_area_iter_first(iopt, iova, iova); | |
40 | if (!iter->area) | |
41 | return NULL; | |
42 | if (!iter->area->pages) { | |
43 | iter->area = NULL; | |
44 | return NULL; | |
45 | } | |
46 | return iter->area; | |
47 | } | |
48 | ||
49 | struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter) | |
50 | { | |
51 | unsigned long last_iova; | |
52 | ||
53 | if (!iter->area) | |
54 | return NULL; | |
55 | last_iova = iopt_area_last_iova(iter->area); | |
56 | if (iter->last_iova <= last_iova) | |
57 | return NULL; | |
58 | ||
59 | iter->cur_iova = last_iova + 1; | |
60 | iter->area = iopt_area_iter_next(iter->area, iter->cur_iova, | |
61 | iter->last_iova); | |
62 | if (!iter->area) | |
63 | return NULL; | |
64 | if (iter->cur_iova != iopt_area_iova(iter->area) || | |
65 | !iter->area->pages) { | |
66 | iter->area = NULL; | |
67 | return NULL; | |
68 | } | |
69 | return iter->area; | |
70 | } | |
71 | ||
72 | static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span, | |
73 | unsigned long length, | |
74 | unsigned long iova_alignment, | |
75 | unsigned long page_offset) | |
76 | { | |
77 | if (span->is_used || span->last_hole - span->start_hole < length - 1) | |
78 | return false; | |
79 | ||
80 | span->start_hole = ALIGN(span->start_hole, iova_alignment) | | |
81 | page_offset; | |
82 | if (span->start_hole > span->last_hole || | |
83 | span->last_hole - span->start_hole < length - 1) | |
84 | return false; | |
85 | return true; | |
86 | } | |
87 | ||
88 | static bool __alloc_iova_check_used(struct interval_tree_span_iter *span, | |
89 | unsigned long length, | |
90 | unsigned long iova_alignment, | |
91 | unsigned long page_offset) | |
92 | { | |
93 | if (span->is_hole || span->last_used - span->start_used < length - 1) | |
94 | return false; | |
95 | ||
96 | span->start_used = ALIGN(span->start_used, iova_alignment) | | |
97 | page_offset; | |
98 | if (span->start_used > span->last_used || | |
99 | span->last_used - span->start_used < length - 1) | |
100 | return false; | |
101 | return true; | |
102 | } | |
103 | ||
104 | /* | |
105 | * Automatically find a block of IOVA that is not being used and not reserved. | |
106 | * Does not return a 0 IOVA even if it is valid. | |
107 | */ | |
108 | static int iopt_alloc_iova(struct io_pagetable *iopt, unsigned long *iova, | |
109 | unsigned long uptr, unsigned long length) | |
110 | { | |
111 | unsigned long page_offset = uptr % PAGE_SIZE; | |
112 | struct interval_tree_double_span_iter used_span; | |
113 | struct interval_tree_span_iter allowed_span; | |
114 | unsigned long iova_alignment; | |
115 | ||
116 | lockdep_assert_held(&iopt->iova_rwsem); | |
117 | ||
118 | /* Protect roundup_pow-of_two() from overflow */ | |
119 | if (length == 0 || length >= ULONG_MAX / 2) | |
120 | return -EOVERFLOW; | |
121 | ||
122 | /* | |
123 | * Keep alignment present in the uptr when building the IOVA, this | |
124 | * increases the chance we can map a THP. | |
125 | */ | |
126 | if (!uptr) | |
127 | iova_alignment = roundup_pow_of_two(length); | |
128 | else | |
129 | iova_alignment = min_t(unsigned long, | |
130 | roundup_pow_of_two(length), | |
131 | 1UL << __ffs64(uptr)); | |
132 | ||
133 | if (iova_alignment < iopt->iova_alignment) | |
134 | return -EINVAL; | |
135 | ||
136 | interval_tree_for_each_span(&allowed_span, &iopt->allowed_itree, | |
137 | PAGE_SIZE, ULONG_MAX - PAGE_SIZE) { | |
138 | if (RB_EMPTY_ROOT(&iopt->allowed_itree.rb_root)) { | |
139 | allowed_span.start_used = PAGE_SIZE; | |
140 | allowed_span.last_used = ULONG_MAX - PAGE_SIZE; | |
141 | allowed_span.is_hole = false; | |
142 | } | |
143 | ||
144 | if (!__alloc_iova_check_used(&allowed_span, length, | |
145 | iova_alignment, page_offset)) | |
146 | continue; | |
147 | ||
148 | interval_tree_for_each_double_span( | |
149 | &used_span, &iopt->reserved_itree, &iopt->area_itree, | |
150 | allowed_span.start_used, allowed_span.last_used) { | |
151 | if (!__alloc_iova_check_hole(&used_span, length, | |
152 | iova_alignment, | |
153 | page_offset)) | |
154 | continue; | |
155 | ||
156 | *iova = used_span.start_hole; | |
157 | return 0; | |
158 | } | |
159 | } | |
160 | return -ENOSPC; | |
161 | } | |
162 | ||
163 | static int iopt_check_iova(struct io_pagetable *iopt, unsigned long iova, | |
164 | unsigned long length) | |
165 | { | |
166 | unsigned long last; | |
167 | ||
168 | lockdep_assert_held(&iopt->iova_rwsem); | |
169 | ||
170 | if ((iova & (iopt->iova_alignment - 1))) | |
171 | return -EINVAL; | |
172 | ||
173 | if (check_add_overflow(iova, length - 1, &last)) | |
174 | return -EOVERFLOW; | |
175 | ||
176 | /* No reserved IOVA intersects the range */ | |
177 | if (iopt_reserved_iter_first(iopt, iova, last)) | |
178 | return -EINVAL; | |
179 | ||
180 | /* Check that there is not already a mapping in the range */ | |
181 | if (iopt_area_iter_first(iopt, iova, last)) | |
182 | return -EEXIST; | |
183 | return 0; | |
184 | } | |
185 | ||
186 | /* | |
187 | * The area takes a slice of the pages from start_bytes to start_byte + length | |
188 | */ | |
189 | static int iopt_insert_area(struct io_pagetable *iopt, struct iopt_area *area, | |
190 | struct iopt_pages *pages, unsigned long iova, | |
191 | unsigned long start_byte, unsigned long length, | |
192 | int iommu_prot) | |
193 | { | |
194 | lockdep_assert_held_write(&iopt->iova_rwsem); | |
195 | ||
196 | if ((iommu_prot & IOMMU_WRITE) && !pages->writable) | |
197 | return -EPERM; | |
198 | ||
199 | area->iommu_prot = iommu_prot; | |
200 | area->page_offset = start_byte % PAGE_SIZE; | |
201 | if (area->page_offset & (iopt->iova_alignment - 1)) | |
202 | return -EINVAL; | |
203 | ||
204 | area->node.start = iova; | |
205 | if (check_add_overflow(iova, length - 1, &area->node.last)) | |
206 | return -EOVERFLOW; | |
207 | ||
208 | area->pages_node.start = start_byte / PAGE_SIZE; | |
209 | if (check_add_overflow(start_byte, length - 1, &area->pages_node.last)) | |
210 | return -EOVERFLOW; | |
211 | area->pages_node.last = area->pages_node.last / PAGE_SIZE; | |
212 | if (WARN_ON(area->pages_node.last >= pages->npages)) | |
213 | return -EOVERFLOW; | |
214 | ||
215 | /* | |
216 | * The area is inserted with a NULL pages indicating it is not fully | |
217 | * initialized yet. | |
218 | */ | |
219 | area->iopt = iopt; | |
220 | interval_tree_insert(&area->node, &iopt->area_itree); | |
221 | return 0; | |
222 | } | |
223 | ||
224 | static int iopt_alloc_area_pages(struct io_pagetable *iopt, | |
225 | struct list_head *pages_list, | |
226 | unsigned long length, unsigned long *dst_iova, | |
227 | int iommu_prot, unsigned int flags) | |
228 | { | |
229 | struct iopt_pages_list *elm; | |
230 | unsigned long iova; | |
231 | int rc = 0; | |
232 | ||
233 | list_for_each_entry(elm, pages_list, next) { | |
234 | elm->area = kzalloc(sizeof(*elm->area), GFP_KERNEL_ACCOUNT); | |
235 | if (!elm->area) | |
236 | return -ENOMEM; | |
237 | } | |
238 | ||
239 | down_write(&iopt->iova_rwsem); | |
240 | if ((length & (iopt->iova_alignment - 1)) || !length) { | |
241 | rc = -EINVAL; | |
242 | goto out_unlock; | |
243 | } | |
244 | ||
245 | if (flags & IOPT_ALLOC_IOVA) { | |
246 | /* Use the first entry to guess the ideal IOVA alignment */ | |
247 | elm = list_first_entry(pages_list, struct iopt_pages_list, | |
248 | next); | |
249 | rc = iopt_alloc_iova( | |
250 | iopt, dst_iova, | |
251 | (uintptr_t)elm->pages->uptr + elm->start_byte, length); | |
252 | if (rc) | |
253 | goto out_unlock; | |
52f52858 JG |
254 | if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && |
255 | WARN_ON(iopt_check_iova(iopt, *dst_iova, length))) { | |
256 | rc = -EINVAL; | |
257 | goto out_unlock; | |
258 | } | |
51fe6141 JG |
259 | } else { |
260 | rc = iopt_check_iova(iopt, *dst_iova, length); | |
261 | if (rc) | |
262 | goto out_unlock; | |
263 | } | |
264 | ||
265 | /* | |
266 | * Areas are created with a NULL pages so that the IOVA space is | |
267 | * reserved and we can unlock the iova_rwsem. | |
268 | */ | |
269 | iova = *dst_iova; | |
270 | list_for_each_entry(elm, pages_list, next) { | |
271 | rc = iopt_insert_area(iopt, elm->area, elm->pages, iova, | |
272 | elm->start_byte, elm->length, iommu_prot); | |
273 | if (rc) | |
274 | goto out_unlock; | |
275 | iova += elm->length; | |
276 | } | |
277 | ||
278 | out_unlock: | |
279 | up_write(&iopt->iova_rwsem); | |
280 | return rc; | |
281 | } | |
282 | ||
283 | static void iopt_abort_area(struct iopt_area *area) | |
284 | { | |
52f52858 JG |
285 | if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) |
286 | WARN_ON(area->pages); | |
51fe6141 JG |
287 | if (area->iopt) { |
288 | down_write(&area->iopt->iova_rwsem); | |
289 | interval_tree_remove(&area->node, &area->iopt->area_itree); | |
290 | up_write(&area->iopt->iova_rwsem); | |
291 | } | |
292 | kfree(area); | |
293 | } | |
294 | ||
295 | void iopt_free_pages_list(struct list_head *pages_list) | |
296 | { | |
297 | struct iopt_pages_list *elm; | |
298 | ||
299 | while ((elm = list_first_entry_or_null(pages_list, | |
300 | struct iopt_pages_list, next))) { | |
301 | if (elm->area) | |
302 | iopt_abort_area(elm->area); | |
303 | if (elm->pages) | |
304 | iopt_put_pages(elm->pages); | |
305 | list_del(&elm->next); | |
306 | kfree(elm); | |
307 | } | |
308 | } | |
309 | ||
310 | static int iopt_fill_domains_pages(struct list_head *pages_list) | |
311 | { | |
312 | struct iopt_pages_list *undo_elm; | |
313 | struct iopt_pages_list *elm; | |
314 | int rc; | |
315 | ||
316 | list_for_each_entry(elm, pages_list, next) { | |
317 | rc = iopt_area_fill_domains(elm->area, elm->pages); | |
318 | if (rc) | |
319 | goto err_undo; | |
320 | } | |
321 | return 0; | |
322 | ||
323 | err_undo: | |
324 | list_for_each_entry(undo_elm, pages_list, next) { | |
325 | if (undo_elm == elm) | |
326 | break; | |
327 | iopt_area_unfill_domains(undo_elm->area, undo_elm->pages); | |
328 | } | |
329 | return rc; | |
330 | } | |
331 | ||
332 | int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list, | |
333 | unsigned long length, unsigned long *dst_iova, | |
334 | int iommu_prot, unsigned int flags) | |
335 | { | |
336 | struct iopt_pages_list *elm; | |
337 | int rc; | |
338 | ||
339 | rc = iopt_alloc_area_pages(iopt, pages_list, length, dst_iova, | |
340 | iommu_prot, flags); | |
341 | if (rc) | |
342 | return rc; | |
343 | ||
344 | down_read(&iopt->domains_rwsem); | |
345 | rc = iopt_fill_domains_pages(pages_list); | |
346 | if (rc) | |
347 | goto out_unlock_domains; | |
348 | ||
349 | down_write(&iopt->iova_rwsem); | |
350 | list_for_each_entry(elm, pages_list, next) { | |
351 | /* | |
352 | * area->pages must be set inside the domains_rwsem to ensure | |
353 | * any newly added domains will get filled. Moves the reference | |
354 | * in from the list. | |
355 | */ | |
356 | elm->area->pages = elm->pages; | |
357 | elm->pages = NULL; | |
358 | elm->area = NULL; | |
359 | } | |
360 | up_write(&iopt->iova_rwsem); | |
361 | out_unlock_domains: | |
362 | up_read(&iopt->domains_rwsem); | |
363 | return rc; | |
364 | } | |
365 | ||
366 | /** | |
367 | * iopt_map_user_pages() - Map a user VA to an iova in the io page table | |
368 | * @ictx: iommufd_ctx the iopt is part of | |
369 | * @iopt: io_pagetable to act on | |
370 | * @iova: If IOPT_ALLOC_IOVA is set this is unused on input and contains | |
371 | * the chosen iova on output. Otherwise is the iova to map to on input | |
372 | * @uptr: User VA to map | |
373 | * @length: Number of bytes to map | |
374 | * @iommu_prot: Combination of IOMMU_READ/WRITE/etc bits for the mapping | |
375 | * @flags: IOPT_ALLOC_IOVA or zero | |
376 | * | |
377 | * iova, uptr, and length must be aligned to iova_alignment. For domain backed | |
378 | * page tables this will pin the pages and load them into the domain at iova. | |
379 | * For non-domain page tables this will only setup a lazy reference and the | |
380 | * caller must use iopt_access_pages() to touch them. | |
381 | * | |
382 | * iopt_unmap_iova() must be called to undo this before the io_pagetable can be | |
383 | * destroyed. | |
384 | */ | |
385 | int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, | |
386 | unsigned long *iova, void __user *uptr, | |
387 | unsigned long length, int iommu_prot, | |
388 | unsigned int flags) | |
389 | { | |
390 | struct iopt_pages_list elm = {}; | |
391 | LIST_HEAD(pages_list); | |
392 | int rc; | |
393 | ||
394 | elm.pages = iopt_alloc_pages(uptr, length, iommu_prot & IOMMU_WRITE); | |
395 | if (IS_ERR(elm.pages)) | |
396 | return PTR_ERR(elm.pages); | |
397 | if (ictx->account_mode == IOPT_PAGES_ACCOUNT_MM && | |
398 | elm.pages->account_mode == IOPT_PAGES_ACCOUNT_USER) | |
399 | elm.pages->account_mode = IOPT_PAGES_ACCOUNT_MM; | |
400 | elm.start_byte = uptr - elm.pages->uptr; | |
401 | elm.length = length; | |
402 | list_add(&elm.next, &pages_list); | |
403 | ||
404 | rc = iopt_map_pages(iopt, &pages_list, length, iova, iommu_prot, flags); | |
405 | if (rc) { | |
406 | if (elm.area) | |
407 | iopt_abort_area(elm.area); | |
408 | if (elm.pages) | |
409 | iopt_put_pages(elm.pages); | |
410 | return rc; | |
411 | } | |
412 | return 0; | |
413 | } | |
414 | ||
415 | int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova, | |
416 | unsigned long length, struct list_head *pages_list) | |
417 | { | |
418 | struct iopt_area_contig_iter iter; | |
419 | unsigned long last_iova; | |
420 | struct iopt_area *area; | |
421 | int rc; | |
422 | ||
423 | if (!length) | |
424 | return -EINVAL; | |
425 | if (check_add_overflow(iova, length - 1, &last_iova)) | |
426 | return -EOVERFLOW; | |
427 | ||
428 | down_read(&iopt->iova_rwsem); | |
429 | iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { | |
430 | struct iopt_pages_list *elm; | |
431 | unsigned long last = min(last_iova, iopt_area_last_iova(area)); | |
432 | ||
433 | elm = kzalloc(sizeof(*elm), GFP_KERNEL_ACCOUNT); | |
434 | if (!elm) { | |
435 | rc = -ENOMEM; | |
436 | goto err_free; | |
437 | } | |
438 | elm->start_byte = iopt_area_start_byte(area, iter.cur_iova); | |
439 | elm->pages = area->pages; | |
440 | elm->length = (last - iter.cur_iova) + 1; | |
441 | kref_get(&elm->pages->kref); | |
442 | list_add_tail(&elm->next, pages_list); | |
443 | } | |
444 | if (!iopt_area_contig_done(&iter)) { | |
445 | rc = -ENOENT; | |
446 | goto err_free; | |
447 | } | |
448 | up_read(&iopt->iova_rwsem); | |
449 | return 0; | |
450 | err_free: | |
451 | up_read(&iopt->iova_rwsem); | |
452 | iopt_free_pages_list(pages_list); | |
453 | return rc; | |
454 | } | |
455 | ||
456 | static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start, | |
457 | unsigned long last, unsigned long *unmapped) | |
458 | { | |
459 | struct iopt_area *area; | |
460 | unsigned long unmapped_bytes = 0; | |
804ca14d | 461 | unsigned int tries = 0; |
51fe6141 JG |
462 | int rc = -ENOENT; |
463 | ||
464 | /* | |
465 | * The domains_rwsem must be held in read mode any time any area->pages | |
466 | * is NULL. This prevents domain attach/detatch from running | |
467 | * concurrently with cleaning up the area. | |
468 | */ | |
8d40205f | 469 | again: |
51fe6141 JG |
470 | down_read(&iopt->domains_rwsem); |
471 | down_write(&iopt->iova_rwsem); | |
472 | while ((area = iopt_area_iter_first(iopt, start, last))) { | |
473 | unsigned long area_last = iopt_area_last_iova(area); | |
474 | unsigned long area_first = iopt_area_iova(area); | |
475 | struct iopt_pages *pages; | |
476 | ||
477 | /* Userspace should not race map/unmap's of the same area */ | |
478 | if (!area->pages) { | |
479 | rc = -EBUSY; | |
480 | goto out_unlock_iova; | |
481 | } | |
482 | ||
483 | if (area_first < start || area_last > last) { | |
484 | rc = -ENOENT; | |
485 | goto out_unlock_iova; | |
486 | } | |
487 | ||
804ca14d JG |
488 | if (area_first != start) |
489 | tries = 0; | |
490 | ||
51fe6141 JG |
491 | /* |
492 | * num_accesses writers must hold the iova_rwsem too, so we can | |
493 | * safely read it under the write side of the iovam_rwsem | |
494 | * without the pages->mutex. | |
495 | */ | |
496 | if (area->num_accesses) { | |
804ca14d JG |
497 | size_t length = iopt_area_length(area); |
498 | ||
51fe6141 JG |
499 | start = area_first; |
500 | area->prevent_access = true; | |
501 | up_write(&iopt->iova_rwsem); | |
502 | up_read(&iopt->domains_rwsem); | |
804ca14d JG |
503 | |
504 | iommufd_access_notify_unmap(iopt, area_first, length); | |
505 | /* Something is not responding to unmap requests. */ | |
506 | tries++; | |
507 | if (WARN_ON(tries > 100)) | |
8d40205f JG |
508 | return -EDEADLOCK; |
509 | goto again; | |
51fe6141 JG |
510 | } |
511 | ||
512 | pages = area->pages; | |
513 | area->pages = NULL; | |
514 | up_write(&iopt->iova_rwsem); | |
515 | ||
516 | iopt_area_unfill_domains(area, pages); | |
517 | iopt_abort_area(area); | |
518 | iopt_put_pages(pages); | |
519 | ||
520 | unmapped_bytes += area_last - area_first + 1; | |
521 | ||
522 | down_write(&iopt->iova_rwsem); | |
523 | } | |
524 | if (unmapped_bytes) | |
525 | rc = 0; | |
526 | ||
527 | out_unlock_iova: | |
528 | up_write(&iopt->iova_rwsem); | |
529 | up_read(&iopt->domains_rwsem); | |
530 | if (unmapped) | |
531 | *unmapped = unmapped_bytes; | |
532 | return rc; | |
533 | } | |
534 | ||
535 | /** | |
536 | * iopt_unmap_iova() - Remove a range of iova | |
537 | * @iopt: io_pagetable to act on | |
538 | * @iova: Starting iova to unmap | |
539 | * @length: Number of bytes to unmap | |
540 | * @unmapped: Return number of bytes unmapped | |
541 | * | |
542 | * The requested range must be a superset of existing ranges. | |
543 | * Splitting/truncating IOVA mappings is not allowed. | |
544 | */ | |
545 | int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, | |
546 | unsigned long length, unsigned long *unmapped) | |
547 | { | |
548 | unsigned long iova_last; | |
549 | ||
550 | if (!length) | |
551 | return -EINVAL; | |
552 | ||
553 | if (check_add_overflow(iova, length - 1, &iova_last)) | |
554 | return -EOVERFLOW; | |
555 | ||
556 | return iopt_unmap_iova_range(iopt, iova, iova_last, unmapped); | |
557 | } | |
558 | ||
559 | int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped) | |
560 | { | |
561 | int rc; | |
562 | ||
563 | rc = iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped); | |
564 | /* If the IOVAs are empty then unmap all succeeds */ | |
565 | if (rc == -ENOENT) | |
566 | return 0; | |
567 | return rc; | |
568 | } | |
569 | ||
570 | /* The caller must always free all the nodes in the allowed_iova rb_root. */ | |
571 | int iopt_set_allow_iova(struct io_pagetable *iopt, | |
572 | struct rb_root_cached *allowed_iova) | |
573 | { | |
574 | struct iopt_allowed *allowed; | |
575 | ||
576 | down_write(&iopt->iova_rwsem); | |
577 | swap(*allowed_iova, iopt->allowed_itree); | |
578 | ||
579 | for (allowed = iopt_allowed_iter_first(iopt, 0, ULONG_MAX); allowed; | |
580 | allowed = iopt_allowed_iter_next(allowed, 0, ULONG_MAX)) { | |
581 | if (iopt_reserved_iter_first(iopt, allowed->node.start, | |
582 | allowed->node.last)) { | |
583 | swap(*allowed_iova, iopt->allowed_itree); | |
584 | up_write(&iopt->iova_rwsem); | |
585 | return -EADDRINUSE; | |
586 | } | |
587 | } | |
588 | up_write(&iopt->iova_rwsem); | |
589 | return 0; | |
590 | } | |
591 | ||
592 | int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start, | |
593 | unsigned long last, void *owner) | |
594 | { | |
595 | struct iopt_reserved *reserved; | |
596 | ||
597 | lockdep_assert_held_write(&iopt->iova_rwsem); | |
598 | ||
599 | if (iopt_area_iter_first(iopt, start, last) || | |
600 | iopt_allowed_iter_first(iopt, start, last)) | |
601 | return -EADDRINUSE; | |
602 | ||
603 | reserved = kzalloc(sizeof(*reserved), GFP_KERNEL_ACCOUNT); | |
604 | if (!reserved) | |
605 | return -ENOMEM; | |
606 | reserved->node.start = start; | |
607 | reserved->node.last = last; | |
608 | reserved->owner = owner; | |
609 | interval_tree_insert(&reserved->node, &iopt->reserved_itree); | |
610 | return 0; | |
611 | } | |
612 | ||
613 | static void __iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner) | |
614 | { | |
615 | struct iopt_reserved *reserved, *next; | |
616 | ||
617 | lockdep_assert_held_write(&iopt->iova_rwsem); | |
618 | ||
619 | for (reserved = iopt_reserved_iter_first(iopt, 0, ULONG_MAX); reserved; | |
620 | reserved = next) { | |
621 | next = iopt_reserved_iter_next(reserved, 0, ULONG_MAX); | |
622 | ||
623 | if (reserved->owner == owner) { | |
624 | interval_tree_remove(&reserved->node, | |
625 | &iopt->reserved_itree); | |
626 | kfree(reserved); | |
627 | } | |
628 | } | |
629 | } | |
630 | ||
631 | void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner) | |
632 | { | |
633 | down_write(&iopt->iova_rwsem); | |
634 | __iopt_remove_reserved_iova(iopt, owner); | |
635 | up_write(&iopt->iova_rwsem); | |
636 | } | |
637 | ||
638 | void iopt_init_table(struct io_pagetable *iopt) | |
639 | { | |
640 | init_rwsem(&iopt->iova_rwsem); | |
641 | init_rwsem(&iopt->domains_rwsem); | |
642 | iopt->area_itree = RB_ROOT_CACHED; | |
643 | iopt->allowed_itree = RB_ROOT_CACHED; | |
644 | iopt->reserved_itree = RB_ROOT_CACHED; | |
645 | xa_init_flags(&iopt->domains, XA_FLAGS_ACCOUNT); | |
646 | xa_init_flags(&iopt->access_list, XA_FLAGS_ALLOC); | |
647 | ||
648 | /* | |
649 | * iopt's start as SW tables that can use the entire size_t IOVA space | |
650 | * due to the use of size_t in the APIs. They have no alignment | |
651 | * restriction. | |
652 | */ | |
653 | iopt->iova_alignment = 1; | |
654 | } | |
655 | ||
656 | void iopt_destroy_table(struct io_pagetable *iopt) | |
657 | { | |
658 | struct interval_tree_node *node; | |
659 | ||
52f52858 JG |
660 | if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) |
661 | iopt_remove_reserved_iova(iopt, NULL); | |
662 | ||
51fe6141 JG |
663 | while ((node = interval_tree_iter_first(&iopt->allowed_itree, 0, |
664 | ULONG_MAX))) { | |
665 | interval_tree_remove(node, &iopt->allowed_itree); | |
666 | kfree(container_of(node, struct iopt_allowed, node)); | |
667 | } | |
668 | ||
669 | WARN_ON(!RB_EMPTY_ROOT(&iopt->reserved_itree.rb_root)); | |
670 | WARN_ON(!xa_empty(&iopt->domains)); | |
671 | WARN_ON(!xa_empty(&iopt->access_list)); | |
672 | WARN_ON(!RB_EMPTY_ROOT(&iopt->area_itree.rb_root)); | |
673 | } | |
674 | ||
675 | /** | |
676 | * iopt_unfill_domain() - Unfill a domain with PFNs | |
677 | * @iopt: io_pagetable to act on | |
678 | * @domain: domain to unfill | |
679 | * | |
680 | * This is used when removing a domain from the iopt. Every area in the iopt | |
681 | * will be unmapped from the domain. The domain must already be removed from the | |
682 | * domains xarray. | |
683 | */ | |
684 | static void iopt_unfill_domain(struct io_pagetable *iopt, | |
685 | struct iommu_domain *domain) | |
686 | { | |
687 | struct iopt_area *area; | |
688 | ||
689 | lockdep_assert_held(&iopt->iova_rwsem); | |
690 | lockdep_assert_held_write(&iopt->domains_rwsem); | |
691 | ||
692 | /* | |
693 | * Some other domain is holding all the pfns still, rapidly unmap this | |
694 | * domain. | |
695 | */ | |
696 | if (iopt->next_domain_id != 0) { | |
697 | /* Pick an arbitrary remaining domain to act as storage */ | |
698 | struct iommu_domain *storage_domain = | |
699 | xa_load(&iopt->domains, 0); | |
700 | ||
701 | for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; | |
702 | area = iopt_area_iter_next(area, 0, ULONG_MAX)) { | |
703 | struct iopt_pages *pages = area->pages; | |
704 | ||
705 | if (!pages) | |
706 | continue; | |
707 | ||
708 | mutex_lock(&pages->mutex); | |
52f52858 JG |
709 | if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) |
710 | WARN_ON(!area->storage_domain); | |
51fe6141 JG |
711 | if (area->storage_domain == domain) |
712 | area->storage_domain = storage_domain; | |
713 | mutex_unlock(&pages->mutex); | |
714 | ||
715 | iopt_area_unmap_domain(area, domain); | |
716 | } | |
717 | return; | |
718 | } | |
719 | ||
720 | for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; | |
721 | area = iopt_area_iter_next(area, 0, ULONG_MAX)) { | |
722 | struct iopt_pages *pages = area->pages; | |
723 | ||
724 | if (!pages) | |
725 | continue; | |
726 | ||
727 | mutex_lock(&pages->mutex); | |
728 | interval_tree_remove(&area->pages_node, &pages->domains_itree); | |
729 | WARN_ON(area->storage_domain != domain); | |
730 | area->storage_domain = NULL; | |
731 | iopt_area_unfill_domain(area, pages, domain); | |
732 | mutex_unlock(&pages->mutex); | |
733 | } | |
734 | } | |
735 | ||
736 | /** | |
737 | * iopt_fill_domain() - Fill a domain with PFNs | |
738 | * @iopt: io_pagetable to act on | |
739 | * @domain: domain to fill | |
740 | * | |
741 | * Fill the domain with PFNs from every area in the iopt. On failure the domain | |
742 | * is left unchanged. | |
743 | */ | |
744 | static int iopt_fill_domain(struct io_pagetable *iopt, | |
745 | struct iommu_domain *domain) | |
746 | { | |
747 | struct iopt_area *end_area; | |
748 | struct iopt_area *area; | |
749 | int rc; | |
750 | ||
751 | lockdep_assert_held(&iopt->iova_rwsem); | |
752 | lockdep_assert_held_write(&iopt->domains_rwsem); | |
753 | ||
754 | for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; | |
755 | area = iopt_area_iter_next(area, 0, ULONG_MAX)) { | |
756 | struct iopt_pages *pages = area->pages; | |
757 | ||
758 | if (!pages) | |
759 | continue; | |
760 | ||
761 | mutex_lock(&pages->mutex); | |
762 | rc = iopt_area_fill_domain(area, domain); | |
763 | if (rc) { | |
764 | mutex_unlock(&pages->mutex); | |
765 | goto out_unfill; | |
766 | } | |
767 | if (!area->storage_domain) { | |
768 | WARN_ON(iopt->next_domain_id != 0); | |
769 | area->storage_domain = domain; | |
770 | interval_tree_insert(&area->pages_node, | |
771 | &pages->domains_itree); | |
772 | } | |
773 | mutex_unlock(&pages->mutex); | |
774 | } | |
775 | return 0; | |
776 | ||
777 | out_unfill: | |
778 | end_area = area; | |
779 | for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; | |
780 | area = iopt_area_iter_next(area, 0, ULONG_MAX)) { | |
781 | struct iopt_pages *pages = area->pages; | |
782 | ||
783 | if (area == end_area) | |
784 | break; | |
785 | if (!pages) | |
786 | continue; | |
787 | mutex_lock(&pages->mutex); | |
788 | if (iopt->next_domain_id == 0) { | |
789 | interval_tree_remove(&area->pages_node, | |
790 | &pages->domains_itree); | |
791 | area->storage_domain = NULL; | |
792 | } | |
793 | iopt_area_unfill_domain(area, pages, domain); | |
794 | mutex_unlock(&pages->mutex); | |
795 | } | |
796 | return rc; | |
797 | } | |
798 | ||
799 | /* All existing area's conform to an increased page size */ | |
800 | static int iopt_check_iova_alignment(struct io_pagetable *iopt, | |
801 | unsigned long new_iova_alignment) | |
802 | { | |
803 | unsigned long align_mask = new_iova_alignment - 1; | |
804 | struct iopt_area *area; | |
805 | ||
806 | lockdep_assert_held(&iopt->iova_rwsem); | |
807 | lockdep_assert_held(&iopt->domains_rwsem); | |
808 | ||
809 | for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; | |
810 | area = iopt_area_iter_next(area, 0, ULONG_MAX)) | |
811 | if ((iopt_area_iova(area) & align_mask) || | |
812 | (iopt_area_length(area) & align_mask) || | |
813 | (area->page_offset & align_mask)) | |
814 | return -EADDRINUSE; | |
52f52858 JG |
815 | |
816 | if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) { | |
817 | struct iommufd_access *access; | |
818 | unsigned long index; | |
819 | ||
820 | xa_for_each(&iopt->access_list, index, access) | |
821 | if (WARN_ON(access->iova_alignment > | |
822 | new_iova_alignment)) | |
823 | return -EADDRINUSE; | |
824 | } | |
51fe6141 JG |
825 | return 0; |
826 | } | |
827 | ||
828 | int iopt_table_add_domain(struct io_pagetable *iopt, | |
829 | struct iommu_domain *domain) | |
830 | { | |
831 | const struct iommu_domain_geometry *geometry = &domain->geometry; | |
832 | struct iommu_domain *iter_domain; | |
833 | unsigned int new_iova_alignment; | |
834 | unsigned long index; | |
835 | int rc; | |
836 | ||
837 | down_write(&iopt->domains_rwsem); | |
838 | down_write(&iopt->iova_rwsem); | |
839 | ||
840 | xa_for_each(&iopt->domains, index, iter_domain) { | |
841 | if (WARN_ON(iter_domain == domain)) { | |
842 | rc = -EEXIST; | |
843 | goto out_unlock; | |
844 | } | |
845 | } | |
846 | ||
847 | /* | |
848 | * The io page size drives the iova_alignment. Internally the iopt_pages | |
849 | * works in PAGE_SIZE units and we adjust when mapping sub-PAGE_SIZE | |
850 | * objects into the iommu_domain. | |
851 | * | |
852 | * A iommu_domain must always be able to accept PAGE_SIZE to be | |
853 | * compatible as we can't guarantee higher contiguity. | |
854 | */ | |
855 | new_iova_alignment = max_t(unsigned long, | |
856 | 1UL << __ffs(domain->pgsize_bitmap), | |
857 | iopt->iova_alignment); | |
858 | if (new_iova_alignment > PAGE_SIZE) { | |
859 | rc = -EINVAL; | |
860 | goto out_unlock; | |
861 | } | |
862 | if (new_iova_alignment != iopt->iova_alignment) { | |
863 | rc = iopt_check_iova_alignment(iopt, new_iova_alignment); | |
864 | if (rc) | |
865 | goto out_unlock; | |
866 | } | |
867 | ||
868 | /* No area exists that is outside the allowed domain aperture */ | |
869 | if (geometry->aperture_start != 0) { | |
870 | rc = iopt_reserve_iova(iopt, 0, geometry->aperture_start - 1, | |
871 | domain); | |
872 | if (rc) | |
873 | goto out_reserved; | |
874 | } | |
875 | if (geometry->aperture_end != ULONG_MAX) { | |
876 | rc = iopt_reserve_iova(iopt, geometry->aperture_end + 1, | |
877 | ULONG_MAX, domain); | |
878 | if (rc) | |
879 | goto out_reserved; | |
880 | } | |
881 | ||
882 | rc = xa_reserve(&iopt->domains, iopt->next_domain_id, GFP_KERNEL); | |
883 | if (rc) | |
884 | goto out_reserved; | |
885 | ||
886 | rc = iopt_fill_domain(iopt, domain); | |
887 | if (rc) | |
888 | goto out_release; | |
889 | ||
890 | iopt->iova_alignment = new_iova_alignment; | |
891 | xa_store(&iopt->domains, iopt->next_domain_id, domain, GFP_KERNEL); | |
892 | iopt->next_domain_id++; | |
893 | up_write(&iopt->iova_rwsem); | |
894 | up_write(&iopt->domains_rwsem); | |
895 | return 0; | |
896 | out_release: | |
897 | xa_release(&iopt->domains, iopt->next_domain_id); | |
898 | out_reserved: | |
899 | __iopt_remove_reserved_iova(iopt, domain); | |
900 | out_unlock: | |
901 | up_write(&iopt->iova_rwsem); | |
902 | up_write(&iopt->domains_rwsem); | |
903 | return rc; | |
904 | } | |
905 | ||
906 | static int iopt_calculate_iova_alignment(struct io_pagetable *iopt) | |
907 | { | |
908 | unsigned long new_iova_alignment; | |
909 | struct iommufd_access *access; | |
910 | struct iommu_domain *domain; | |
911 | unsigned long index; | |
912 | ||
913 | lockdep_assert_held_write(&iopt->iova_rwsem); | |
914 | lockdep_assert_held(&iopt->domains_rwsem); | |
915 | ||
916 | /* See batch_iommu_map_small() */ | |
917 | if (iopt->disable_large_pages) | |
918 | new_iova_alignment = PAGE_SIZE; | |
919 | else | |
920 | new_iova_alignment = 1; | |
921 | ||
922 | xa_for_each(&iopt->domains, index, domain) | |
923 | new_iova_alignment = max_t(unsigned long, | |
924 | 1UL << __ffs(domain->pgsize_bitmap), | |
925 | new_iova_alignment); | |
926 | xa_for_each(&iopt->access_list, index, access) | |
927 | new_iova_alignment = max_t(unsigned long, | |
928 | access->iova_alignment, | |
929 | new_iova_alignment); | |
930 | ||
931 | if (new_iova_alignment > iopt->iova_alignment) { | |
932 | int rc; | |
933 | ||
934 | rc = iopt_check_iova_alignment(iopt, new_iova_alignment); | |
935 | if (rc) | |
936 | return rc; | |
937 | } | |
938 | iopt->iova_alignment = new_iova_alignment; | |
939 | return 0; | |
940 | } | |
941 | ||
942 | void iopt_table_remove_domain(struct io_pagetable *iopt, | |
943 | struct iommu_domain *domain) | |
944 | { | |
945 | struct iommu_domain *iter_domain = NULL; | |
946 | unsigned long index; | |
947 | ||
948 | down_write(&iopt->domains_rwsem); | |
949 | down_write(&iopt->iova_rwsem); | |
950 | ||
951 | xa_for_each(&iopt->domains, index, iter_domain) | |
952 | if (iter_domain == domain) | |
953 | break; | |
954 | if (WARN_ON(iter_domain != domain) || index >= iopt->next_domain_id) | |
955 | goto out_unlock; | |
956 | ||
957 | /* | |
958 | * Compress the xarray to keep it linear by swapping the entry to erase | |
959 | * with the tail entry and shrinking the tail. | |
960 | */ | |
961 | iopt->next_domain_id--; | |
962 | iter_domain = xa_erase(&iopt->domains, iopt->next_domain_id); | |
963 | if (index != iopt->next_domain_id) | |
964 | xa_store(&iopt->domains, index, iter_domain, GFP_KERNEL); | |
965 | ||
966 | iopt_unfill_domain(iopt, domain); | |
967 | __iopt_remove_reserved_iova(iopt, domain); | |
968 | ||
969 | WARN_ON(iopt_calculate_iova_alignment(iopt)); | |
970 | out_unlock: | |
971 | up_write(&iopt->iova_rwsem); | |
972 | up_write(&iopt->domains_rwsem); | |
973 | } | |
974 | ||
975 | /** | |
976 | * iopt_area_split - Split an area into two parts at iova | |
977 | * @area: The area to split | |
978 | * @iova: Becomes the last of a new area | |
979 | * | |
980 | * This splits an area into two. It is part of the VFIO compatibility to allow | |
981 | * poking a hole in the mapping. The two areas continue to point at the same | |
982 | * iopt_pages, just with different starting bytes. | |
983 | */ | |
984 | static int iopt_area_split(struct iopt_area *area, unsigned long iova) | |
985 | { | |
986 | unsigned long alignment = area->iopt->iova_alignment; | |
987 | unsigned long last_iova = iopt_area_last_iova(area); | |
988 | unsigned long start_iova = iopt_area_iova(area); | |
989 | unsigned long new_start = iova + 1; | |
990 | struct io_pagetable *iopt = area->iopt; | |
991 | struct iopt_pages *pages = area->pages; | |
992 | struct iopt_area *lhs; | |
993 | struct iopt_area *rhs; | |
994 | int rc; | |
995 | ||
996 | lockdep_assert_held_write(&iopt->iova_rwsem); | |
997 | ||
998 | if (iova == start_iova || iova == last_iova) | |
999 | return 0; | |
1000 | ||
1001 | if (!pages || area->prevent_access) | |
1002 | return -EBUSY; | |
1003 | ||
1004 | if (new_start & (alignment - 1) || | |
1005 | iopt_area_start_byte(area, new_start) & (alignment - 1)) | |
1006 | return -EINVAL; | |
1007 | ||
1008 | lhs = kzalloc(sizeof(*area), GFP_KERNEL_ACCOUNT); | |
1009 | if (!lhs) | |
1010 | return -ENOMEM; | |
1011 | ||
1012 | rhs = kzalloc(sizeof(*area), GFP_KERNEL_ACCOUNT); | |
1013 | if (!rhs) { | |
1014 | rc = -ENOMEM; | |
1015 | goto err_free_lhs; | |
1016 | } | |
1017 | ||
1018 | mutex_lock(&pages->mutex); | |
1019 | /* | |
1020 | * Splitting is not permitted if an access exists, we don't track enough | |
1021 | * information to split existing accesses. | |
1022 | */ | |
1023 | if (area->num_accesses) { | |
1024 | rc = -EINVAL; | |
1025 | goto err_unlock; | |
1026 | } | |
1027 | ||
1028 | /* | |
1029 | * Splitting is not permitted if a domain could have been mapped with | |
1030 | * huge pages. | |
1031 | */ | |
1032 | if (area->storage_domain && !iopt->disable_large_pages) { | |
1033 | rc = -EINVAL; | |
1034 | goto err_unlock; | |
1035 | } | |
1036 | ||
1037 | interval_tree_remove(&area->node, &iopt->area_itree); | |
1038 | rc = iopt_insert_area(iopt, lhs, area->pages, start_iova, | |
1039 | iopt_area_start_byte(area, start_iova), | |
1040 | (new_start - 1) - start_iova + 1, | |
1041 | area->iommu_prot); | |
1042 | if (WARN_ON(rc)) | |
1043 | goto err_insert; | |
1044 | ||
1045 | rc = iopt_insert_area(iopt, rhs, area->pages, new_start, | |
1046 | iopt_area_start_byte(area, new_start), | |
1047 | last_iova - new_start + 1, area->iommu_prot); | |
1048 | if (WARN_ON(rc)) | |
1049 | goto err_remove_lhs; | |
1050 | ||
1051 | lhs->storage_domain = area->storage_domain; | |
1052 | lhs->pages = area->pages; | |
1053 | rhs->storage_domain = area->storage_domain; | |
1054 | rhs->pages = area->pages; | |
1055 | kref_get(&rhs->pages->kref); | |
1056 | kfree(area); | |
1057 | mutex_unlock(&pages->mutex); | |
1058 | ||
1059 | /* | |
1060 | * No change to domains or accesses because the pages hasn't been | |
1061 | * changed | |
1062 | */ | |
1063 | return 0; | |
1064 | ||
1065 | err_remove_lhs: | |
1066 | interval_tree_remove(&lhs->node, &iopt->area_itree); | |
1067 | err_insert: | |
1068 | interval_tree_insert(&area->node, &iopt->area_itree); | |
1069 | err_unlock: | |
1070 | mutex_unlock(&pages->mutex); | |
1071 | kfree(rhs); | |
1072 | err_free_lhs: | |
1073 | kfree(lhs); | |
1074 | return rc; | |
1075 | } | |
1076 | ||
1077 | int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas, | |
1078 | size_t num_iovas) | |
1079 | { | |
1080 | int rc = 0; | |
1081 | int i; | |
1082 | ||
1083 | down_write(&iopt->iova_rwsem); | |
1084 | for (i = 0; i < num_iovas; i++) { | |
1085 | struct iopt_area *area; | |
1086 | ||
1087 | area = iopt_area_iter_first(iopt, iovas[i], iovas[i]); | |
1088 | if (!area) | |
1089 | continue; | |
1090 | rc = iopt_area_split(area, iovas[i]); | |
1091 | if (rc) | |
1092 | break; | |
1093 | } | |
1094 | up_write(&iopt->iova_rwsem); | |
1095 | return rc; | |
1096 | } | |
1097 | ||
1098 | void iopt_enable_large_pages(struct io_pagetable *iopt) | |
1099 | { | |
1100 | int rc; | |
1101 | ||
1102 | down_write(&iopt->domains_rwsem); | |
1103 | down_write(&iopt->iova_rwsem); | |
1104 | WRITE_ONCE(iopt->disable_large_pages, false); | |
1105 | rc = iopt_calculate_iova_alignment(iopt); | |
1106 | WARN_ON(rc); | |
1107 | up_write(&iopt->iova_rwsem); | |
1108 | up_write(&iopt->domains_rwsem); | |
1109 | } | |
1110 | ||
1111 | int iopt_disable_large_pages(struct io_pagetable *iopt) | |
1112 | { | |
1113 | int rc = 0; | |
1114 | ||
1115 | down_write(&iopt->domains_rwsem); | |
1116 | down_write(&iopt->iova_rwsem); | |
1117 | if (iopt->disable_large_pages) | |
1118 | goto out_unlock; | |
1119 | ||
1120 | /* Won't do it if domains already have pages mapped in them */ | |
1121 | if (!xa_empty(&iopt->domains) && | |
1122 | !RB_EMPTY_ROOT(&iopt->area_itree.rb_root)) { | |
1123 | rc = -EINVAL; | |
1124 | goto out_unlock; | |
1125 | } | |
1126 | ||
1127 | WRITE_ONCE(iopt->disable_large_pages, true); | |
1128 | rc = iopt_calculate_iova_alignment(iopt); | |
1129 | if (rc) | |
1130 | WRITE_ONCE(iopt->disable_large_pages, false); | |
1131 | out_unlock: | |
1132 | up_write(&iopt->iova_rwsem); | |
1133 | up_write(&iopt->domains_rwsem); | |
1134 | return rc; | |
1135 | } | |
1136 | ||
1137 | int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access) | |
1138 | { | |
1139 | int rc; | |
1140 | ||
1141 | down_write(&iopt->domains_rwsem); | |
1142 | down_write(&iopt->iova_rwsem); | |
1143 | rc = xa_alloc(&iopt->access_list, &access->iopt_access_list_id, access, | |
1144 | xa_limit_16b, GFP_KERNEL_ACCOUNT); | |
1145 | if (rc) | |
1146 | goto out_unlock; | |
1147 | ||
1148 | rc = iopt_calculate_iova_alignment(iopt); | |
1149 | if (rc) { | |
1150 | xa_erase(&iopt->access_list, access->iopt_access_list_id); | |
1151 | goto out_unlock; | |
1152 | } | |
1153 | ||
1154 | out_unlock: | |
1155 | up_write(&iopt->iova_rwsem); | |
1156 | up_write(&iopt->domains_rwsem); | |
1157 | return rc; | |
1158 | } | |
1159 | ||
1160 | void iopt_remove_access(struct io_pagetable *iopt, | |
1161 | struct iommufd_access *access) | |
1162 | { | |
1163 | down_write(&iopt->domains_rwsem); | |
1164 | down_write(&iopt->iova_rwsem); | |
1165 | WARN_ON(xa_erase(&iopt->access_list, access->iopt_access_list_id) != | |
1166 | access); | |
1167 | WARN_ON(iopt_calculate_iova_alignment(iopt)); | |
1168 | up_write(&iopt->iova_rwsem); | |
1169 | up_write(&iopt->domains_rwsem); | |
1170 | } | |
1171 | ||
34f327a9 JG |
1172 | /* Narrow the valid_iova_itree to include reserved ranges from a device. */ |
1173 | int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt, | |
1174 | struct device *dev, | |
1175 | phys_addr_t *sw_msi_start) | |
51fe6141 JG |
1176 | { |
1177 | struct iommu_resv_region *resv; | |
34f327a9 | 1178 | LIST_HEAD(resv_regions); |
d6c55c0a JG |
1179 | unsigned int num_hw_msi = 0; |
1180 | unsigned int num_sw_msi = 0; | |
51fe6141 JG |
1181 | int rc; |
1182 | ||
6583c865 JG |
1183 | if (iommufd_should_fail()) |
1184 | return -EINVAL; | |
1185 | ||
51fe6141 | 1186 | down_write(&iopt->iova_rwsem); |
34f327a9 JG |
1187 | /* FIXME: drivers allocate memory but there is no failure propogated */ |
1188 | iommu_get_resv_regions(dev, &resv_regions); | |
51fe6141 | 1189 | |
34f327a9 | 1190 | list_for_each_entry(resv, &resv_regions, list) { |
51fe6141 JG |
1191 | if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE) |
1192 | continue; | |
1193 | ||
d6c55c0a JG |
1194 | if (sw_msi_start && resv->type == IOMMU_RESV_MSI) |
1195 | num_hw_msi++; | |
1196 | if (sw_msi_start && resv->type == IOMMU_RESV_SW_MSI) { | |
51fe6141 | 1197 | *sw_msi_start = resv->start; |
d6c55c0a JG |
1198 | num_sw_msi++; |
1199 | } | |
51fe6141 JG |
1200 | |
1201 | rc = iopt_reserve_iova(iopt, resv->start, | |
34f327a9 | 1202 | resv->length - 1 + resv->start, dev); |
51fe6141 JG |
1203 | if (rc) |
1204 | goto out_reserved; | |
1205 | } | |
d6c55c0a JG |
1206 | |
1207 | /* Drivers must offer sane combinations of regions */ | |
1208 | if (WARN_ON(num_sw_msi && num_hw_msi) || WARN_ON(num_sw_msi > 1)) { | |
1209 | rc = -EINVAL; | |
1210 | goto out_reserved; | |
1211 | } | |
1212 | ||
51fe6141 JG |
1213 | rc = 0; |
1214 | goto out_free_resv; | |
1215 | ||
1216 | out_reserved: | |
34f327a9 | 1217 | __iopt_remove_reserved_iova(iopt, dev); |
51fe6141 | 1218 | out_free_resv: |
34f327a9 | 1219 | iommu_put_resv_regions(dev, &resv_regions); |
51fe6141 JG |
1220 | up_write(&iopt->iova_rwsem); |
1221 | return rc; | |
1222 | } |