Commit | Line | Data |
---|---|---|
51fe6141 JG |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. | |
3 | * | |
4 | * The io_pagetable is the top of datastructure that maps IOVA's to PFNs. The | |
5 | * PFNs can be placed into an iommu_domain, or returned to the caller as a page | |
6 | * list for access by an in-kernel user. | |
7 | * | |
8 | * The datastructure uses the iopt_pages to optimize the storage of the PFNs | |
9 | * between the domains and xarray. | |
10 | */ | |
11 | #include <linux/iommufd.h> | |
12 | #include <linux/lockdep.h> | |
13 | #include <linux/iommu.h> | |
14 | #include <linux/sched/mm.h> | |
15 | #include <linux/err.h> | |
16 | #include <linux/slab.h> | |
17 | #include <linux/errno.h> | |
b9a60d6f | 18 | #include <uapi/linux/iommufd.h> |
51fe6141 JG |
19 | |
20 | #include "io_pagetable.h" | |
21 | #include "double_span.h" | |
22 | ||
23 | struct iopt_pages_list { | |
24 | struct iopt_pages *pages; | |
25 | struct iopt_area *area; | |
26 | struct list_head next; | |
27 | unsigned long start_byte; | |
28 | unsigned long length; | |
29 | }; | |
30 | ||
31 | struct iopt_area *iopt_area_contig_init(struct iopt_area_contig_iter *iter, | |
32 | struct io_pagetable *iopt, | |
33 | unsigned long iova, | |
34 | unsigned long last_iova) | |
35 | { | |
36 | lockdep_assert_held(&iopt->iova_rwsem); | |
37 | ||
38 | iter->cur_iova = iova; | |
39 | iter->last_iova = last_iova; | |
40 | iter->area = iopt_area_iter_first(iopt, iova, iova); | |
41 | if (!iter->area) | |
42 | return NULL; | |
43 | if (!iter->area->pages) { | |
44 | iter->area = NULL; | |
45 | return NULL; | |
46 | } | |
47 | return iter->area; | |
48 | } | |
49 | ||
50 | struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter) | |
51 | { | |
52 | unsigned long last_iova; | |
53 | ||
54 | if (!iter->area) | |
55 | return NULL; | |
56 | last_iova = iopt_area_last_iova(iter->area); | |
57 | if (iter->last_iova <= last_iova) | |
58 | return NULL; | |
59 | ||
60 | iter->cur_iova = last_iova + 1; | |
61 | iter->area = iopt_area_iter_next(iter->area, iter->cur_iova, | |
62 | iter->last_iova); | |
63 | if (!iter->area) | |
64 | return NULL; | |
65 | if (iter->cur_iova != iopt_area_iova(iter->area) || | |
66 | !iter->area->pages) { | |
67 | iter->area = NULL; | |
68 | return NULL; | |
69 | } | |
70 | return iter->area; | |
71 | } | |
72 | ||
73 | static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span, | |
74 | unsigned long length, | |
75 | unsigned long iova_alignment, | |
76 | unsigned long page_offset) | |
77 | { | |
78 | if (span->is_used || span->last_hole - span->start_hole < length - 1) | |
79 | return false; | |
80 | ||
81 | span->start_hole = ALIGN(span->start_hole, iova_alignment) | | |
82 | page_offset; | |
83 | if (span->start_hole > span->last_hole || | |
84 | span->last_hole - span->start_hole < length - 1) | |
85 | return false; | |
86 | return true; | |
87 | } | |
88 | ||
89 | static bool __alloc_iova_check_used(struct interval_tree_span_iter *span, | |
90 | unsigned long length, | |
91 | unsigned long iova_alignment, | |
92 | unsigned long page_offset) | |
93 | { | |
94 | if (span->is_hole || span->last_used - span->start_used < length - 1) | |
95 | return false; | |
96 | ||
97 | span->start_used = ALIGN(span->start_used, iova_alignment) | | |
98 | page_offset; | |
99 | if (span->start_used > span->last_used || | |
100 | span->last_used - span->start_used < length - 1) | |
101 | return false; | |
102 | return true; | |
103 | } | |
104 | ||
105 | /* | |
106 | * Automatically find a block of IOVA that is not being used and not reserved. | |
107 | * Does not return a 0 IOVA even if it is valid. | |
108 | */ | |
109 | static int iopt_alloc_iova(struct io_pagetable *iopt, unsigned long *iova, | |
110 | unsigned long uptr, unsigned long length) | |
111 | { | |
112 | unsigned long page_offset = uptr % PAGE_SIZE; | |
113 | struct interval_tree_double_span_iter used_span; | |
114 | struct interval_tree_span_iter allowed_span; | |
115 | unsigned long iova_alignment; | |
116 | ||
117 | lockdep_assert_held(&iopt->iova_rwsem); | |
118 | ||
119 | /* Protect roundup_pow-of_two() from overflow */ | |
120 | if (length == 0 || length >= ULONG_MAX / 2) | |
121 | return -EOVERFLOW; | |
122 | ||
123 | /* | |
124 | * Keep alignment present in the uptr when building the IOVA, this | |
125 | * increases the chance we can map a THP. | |
126 | */ | |
127 | if (!uptr) | |
128 | iova_alignment = roundup_pow_of_two(length); | |
129 | else | |
130 | iova_alignment = min_t(unsigned long, | |
131 | roundup_pow_of_two(length), | |
132 | 1UL << __ffs64(uptr)); | |
133 | ||
134 | if (iova_alignment < iopt->iova_alignment) | |
135 | return -EINVAL; | |
136 | ||
137 | interval_tree_for_each_span(&allowed_span, &iopt->allowed_itree, | |
138 | PAGE_SIZE, ULONG_MAX - PAGE_SIZE) { | |
139 | if (RB_EMPTY_ROOT(&iopt->allowed_itree.rb_root)) { | |
140 | allowed_span.start_used = PAGE_SIZE; | |
141 | allowed_span.last_used = ULONG_MAX - PAGE_SIZE; | |
142 | allowed_span.is_hole = false; | |
143 | } | |
144 | ||
145 | if (!__alloc_iova_check_used(&allowed_span, length, | |
146 | iova_alignment, page_offset)) | |
147 | continue; | |
148 | ||
149 | interval_tree_for_each_double_span( | |
150 | &used_span, &iopt->reserved_itree, &iopt->area_itree, | |
151 | allowed_span.start_used, allowed_span.last_used) { | |
152 | if (!__alloc_iova_check_hole(&used_span, length, | |
153 | iova_alignment, | |
154 | page_offset)) | |
155 | continue; | |
156 | ||
157 | *iova = used_span.start_hole; | |
158 | return 0; | |
159 | } | |
160 | } | |
161 | return -ENOSPC; | |
162 | } | |
163 | ||
164 | static int iopt_check_iova(struct io_pagetable *iopt, unsigned long iova, | |
165 | unsigned long length) | |
166 | { | |
167 | unsigned long last; | |
168 | ||
169 | lockdep_assert_held(&iopt->iova_rwsem); | |
170 | ||
171 | if ((iova & (iopt->iova_alignment - 1))) | |
172 | return -EINVAL; | |
173 | ||
174 | if (check_add_overflow(iova, length - 1, &last)) | |
175 | return -EOVERFLOW; | |
176 | ||
177 | /* No reserved IOVA intersects the range */ | |
178 | if (iopt_reserved_iter_first(iopt, iova, last)) | |
179 | return -EINVAL; | |
180 | ||
181 | /* Check that there is not already a mapping in the range */ | |
182 | if (iopt_area_iter_first(iopt, iova, last)) | |
183 | return -EEXIST; | |
184 | return 0; | |
185 | } | |
186 | ||
187 | /* | |
188 | * The area takes a slice of the pages from start_bytes to start_byte + length | |
189 | */ | |
190 | static int iopt_insert_area(struct io_pagetable *iopt, struct iopt_area *area, | |
191 | struct iopt_pages *pages, unsigned long iova, | |
192 | unsigned long start_byte, unsigned long length, | |
193 | int iommu_prot) | |
194 | { | |
195 | lockdep_assert_held_write(&iopt->iova_rwsem); | |
196 | ||
197 | if ((iommu_prot & IOMMU_WRITE) && !pages->writable) | |
198 | return -EPERM; | |
199 | ||
200 | area->iommu_prot = iommu_prot; | |
201 | area->page_offset = start_byte % PAGE_SIZE; | |
202 | if (area->page_offset & (iopt->iova_alignment - 1)) | |
203 | return -EINVAL; | |
204 | ||
205 | area->node.start = iova; | |
206 | if (check_add_overflow(iova, length - 1, &area->node.last)) | |
207 | return -EOVERFLOW; | |
208 | ||
209 | area->pages_node.start = start_byte / PAGE_SIZE; | |
210 | if (check_add_overflow(start_byte, length - 1, &area->pages_node.last)) | |
211 | return -EOVERFLOW; | |
212 | area->pages_node.last = area->pages_node.last / PAGE_SIZE; | |
213 | if (WARN_ON(area->pages_node.last >= pages->npages)) | |
214 | return -EOVERFLOW; | |
215 | ||
216 | /* | |
217 | * The area is inserted with a NULL pages indicating it is not fully | |
218 | * initialized yet. | |
219 | */ | |
220 | area->iopt = iopt; | |
221 | interval_tree_insert(&area->node, &iopt->area_itree); | |
222 | return 0; | |
223 | } | |
224 | ||
361d744d JG |
225 | static struct iopt_area *iopt_area_alloc(void) |
226 | { | |
227 | struct iopt_area *area; | |
228 | ||
229 | area = kzalloc(sizeof(*area), GFP_KERNEL_ACCOUNT); | |
230 | if (!area) | |
231 | return NULL; | |
232 | RB_CLEAR_NODE(&area->node.rb); | |
233 | RB_CLEAR_NODE(&area->pages_node.rb); | |
234 | return area; | |
235 | } | |
236 | ||
51fe6141 JG |
237 | static int iopt_alloc_area_pages(struct io_pagetable *iopt, |
238 | struct list_head *pages_list, | |
239 | unsigned long length, unsigned long *dst_iova, | |
240 | int iommu_prot, unsigned int flags) | |
241 | { | |
242 | struct iopt_pages_list *elm; | |
243 | unsigned long iova; | |
244 | int rc = 0; | |
245 | ||
246 | list_for_each_entry(elm, pages_list, next) { | |
361d744d | 247 | elm->area = iopt_area_alloc(); |
51fe6141 JG |
248 | if (!elm->area) |
249 | return -ENOMEM; | |
250 | } | |
251 | ||
252 | down_write(&iopt->iova_rwsem); | |
253 | if ((length & (iopt->iova_alignment - 1)) || !length) { | |
254 | rc = -EINVAL; | |
255 | goto out_unlock; | |
256 | } | |
257 | ||
258 | if (flags & IOPT_ALLOC_IOVA) { | |
259 | /* Use the first entry to guess the ideal IOVA alignment */ | |
260 | elm = list_first_entry(pages_list, struct iopt_pages_list, | |
261 | next); | |
262 | rc = iopt_alloc_iova( | |
263 | iopt, dst_iova, | |
264 | (uintptr_t)elm->pages->uptr + elm->start_byte, length); | |
265 | if (rc) | |
266 | goto out_unlock; | |
52f52858 JG |
267 | if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && |
268 | WARN_ON(iopt_check_iova(iopt, *dst_iova, length))) { | |
269 | rc = -EINVAL; | |
270 | goto out_unlock; | |
271 | } | |
51fe6141 JG |
272 | } else { |
273 | rc = iopt_check_iova(iopt, *dst_iova, length); | |
274 | if (rc) | |
275 | goto out_unlock; | |
276 | } | |
277 | ||
278 | /* | |
279 | * Areas are created with a NULL pages so that the IOVA space is | |
280 | * reserved and we can unlock the iova_rwsem. | |
281 | */ | |
282 | iova = *dst_iova; | |
283 | list_for_each_entry(elm, pages_list, next) { | |
284 | rc = iopt_insert_area(iopt, elm->area, elm->pages, iova, | |
285 | elm->start_byte, elm->length, iommu_prot); | |
286 | if (rc) | |
287 | goto out_unlock; | |
288 | iova += elm->length; | |
289 | } | |
290 | ||
291 | out_unlock: | |
292 | up_write(&iopt->iova_rwsem); | |
293 | return rc; | |
294 | } | |
295 | ||
296 | static void iopt_abort_area(struct iopt_area *area) | |
297 | { | |
52f52858 JG |
298 | if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) |
299 | WARN_ON(area->pages); | |
51fe6141 JG |
300 | if (area->iopt) { |
301 | down_write(&area->iopt->iova_rwsem); | |
302 | interval_tree_remove(&area->node, &area->iopt->area_itree); | |
303 | up_write(&area->iopt->iova_rwsem); | |
304 | } | |
305 | kfree(area); | |
306 | } | |
307 | ||
308 | void iopt_free_pages_list(struct list_head *pages_list) | |
309 | { | |
310 | struct iopt_pages_list *elm; | |
311 | ||
312 | while ((elm = list_first_entry_or_null(pages_list, | |
313 | struct iopt_pages_list, next))) { | |
314 | if (elm->area) | |
315 | iopt_abort_area(elm->area); | |
316 | if (elm->pages) | |
317 | iopt_put_pages(elm->pages); | |
318 | list_del(&elm->next); | |
319 | kfree(elm); | |
320 | } | |
321 | } | |
322 | ||
323 | static int iopt_fill_domains_pages(struct list_head *pages_list) | |
324 | { | |
325 | struct iopt_pages_list *undo_elm; | |
326 | struct iopt_pages_list *elm; | |
327 | int rc; | |
328 | ||
329 | list_for_each_entry(elm, pages_list, next) { | |
330 | rc = iopt_area_fill_domains(elm->area, elm->pages); | |
331 | if (rc) | |
332 | goto err_undo; | |
333 | } | |
334 | return 0; | |
335 | ||
336 | err_undo: | |
337 | list_for_each_entry(undo_elm, pages_list, next) { | |
338 | if (undo_elm == elm) | |
339 | break; | |
340 | iopt_area_unfill_domains(undo_elm->area, undo_elm->pages); | |
341 | } | |
342 | return rc; | |
343 | } | |
344 | ||
345 | int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list, | |
346 | unsigned long length, unsigned long *dst_iova, | |
347 | int iommu_prot, unsigned int flags) | |
348 | { | |
349 | struct iopt_pages_list *elm; | |
350 | int rc; | |
351 | ||
352 | rc = iopt_alloc_area_pages(iopt, pages_list, length, dst_iova, | |
353 | iommu_prot, flags); | |
354 | if (rc) | |
355 | return rc; | |
356 | ||
357 | down_read(&iopt->domains_rwsem); | |
358 | rc = iopt_fill_domains_pages(pages_list); | |
359 | if (rc) | |
360 | goto out_unlock_domains; | |
361 | ||
362 | down_write(&iopt->iova_rwsem); | |
363 | list_for_each_entry(elm, pages_list, next) { | |
364 | /* | |
365 | * area->pages must be set inside the domains_rwsem to ensure | |
366 | * any newly added domains will get filled. Moves the reference | |
367 | * in from the list. | |
368 | */ | |
369 | elm->area->pages = elm->pages; | |
370 | elm->pages = NULL; | |
371 | elm->area = NULL; | |
372 | } | |
373 | up_write(&iopt->iova_rwsem); | |
374 | out_unlock_domains: | |
375 | up_read(&iopt->domains_rwsem); | |
376 | return rc; | |
377 | } | |
378 | ||
379 | /** | |
380 | * iopt_map_user_pages() - Map a user VA to an iova in the io page table | |
381 | * @ictx: iommufd_ctx the iopt is part of | |
382 | * @iopt: io_pagetable to act on | |
383 | * @iova: If IOPT_ALLOC_IOVA is set this is unused on input and contains | |
384 | * the chosen iova on output. Otherwise is the iova to map to on input | |
385 | * @uptr: User VA to map | |
386 | * @length: Number of bytes to map | |
387 | * @iommu_prot: Combination of IOMMU_READ/WRITE/etc bits for the mapping | |
388 | * @flags: IOPT_ALLOC_IOVA or zero | |
389 | * | |
390 | * iova, uptr, and length must be aligned to iova_alignment. For domain backed | |
391 | * page tables this will pin the pages and load them into the domain at iova. | |
392 | * For non-domain page tables this will only setup a lazy reference and the | |
393 | * caller must use iopt_access_pages() to touch them. | |
394 | * | |
395 | * iopt_unmap_iova() must be called to undo this before the io_pagetable can be | |
396 | * destroyed. | |
397 | */ | |
398 | int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, | |
399 | unsigned long *iova, void __user *uptr, | |
400 | unsigned long length, int iommu_prot, | |
401 | unsigned int flags) | |
402 | { | |
403 | struct iopt_pages_list elm = {}; | |
404 | LIST_HEAD(pages_list); | |
405 | int rc; | |
406 | ||
407 | elm.pages = iopt_alloc_pages(uptr, length, iommu_prot & IOMMU_WRITE); | |
408 | if (IS_ERR(elm.pages)) | |
409 | return PTR_ERR(elm.pages); | |
410 | if (ictx->account_mode == IOPT_PAGES_ACCOUNT_MM && | |
411 | elm.pages->account_mode == IOPT_PAGES_ACCOUNT_USER) | |
412 | elm.pages->account_mode = IOPT_PAGES_ACCOUNT_MM; | |
413 | elm.start_byte = uptr - elm.pages->uptr; | |
414 | elm.length = length; | |
415 | list_add(&elm.next, &pages_list); | |
416 | ||
417 | rc = iopt_map_pages(iopt, &pages_list, length, iova, iommu_prot, flags); | |
418 | if (rc) { | |
419 | if (elm.area) | |
420 | iopt_abort_area(elm.area); | |
421 | if (elm.pages) | |
422 | iopt_put_pages(elm.pages); | |
423 | return rc; | |
424 | } | |
425 | return 0; | |
426 | } | |
427 | ||
b9a60d6f | 428 | struct iova_bitmap_fn_arg { |
60984813 | 429 | unsigned long flags; |
b9a60d6f JM |
430 | struct io_pagetable *iopt; |
431 | struct iommu_domain *domain; | |
432 | struct iommu_dirty_bitmap *dirty; | |
433 | }; | |
434 | ||
435 | static int __iommu_read_and_clear_dirty(struct iova_bitmap *bitmap, | |
436 | unsigned long iova, size_t length, | |
437 | void *opaque) | |
438 | { | |
439 | struct iopt_area *area; | |
440 | struct iopt_area_contig_iter iter; | |
441 | struct iova_bitmap_fn_arg *arg = opaque; | |
442 | struct iommu_domain *domain = arg->domain; | |
443 | struct iommu_dirty_bitmap *dirty = arg->dirty; | |
444 | const struct iommu_dirty_ops *ops = domain->dirty_ops; | |
445 | unsigned long last_iova = iova + length - 1; | |
60984813 | 446 | unsigned long flags = arg->flags; |
b9a60d6f JM |
447 | int ret; |
448 | ||
449 | iopt_for_each_contig_area(&iter, area, arg->iopt, iova, last_iova) { | |
450 | unsigned long last = min(last_iova, iopt_area_last_iova(area)); | |
451 | ||
452 | ret = ops->read_and_clear_dirty(domain, iter.cur_iova, | |
60984813 | 453 | last - iter.cur_iova + 1, flags, |
b9a60d6f JM |
454 | dirty); |
455 | if (ret) | |
456 | return ret; | |
457 | } | |
458 | ||
459 | if (!iopt_area_contig_done(&iter)) | |
460 | return -EINVAL; | |
461 | return 0; | |
462 | } | |
463 | ||
464 | static int | |
465 | iommu_read_and_clear_dirty(struct iommu_domain *domain, | |
466 | struct io_pagetable *iopt, unsigned long flags, | |
467 | struct iommu_hwpt_get_dirty_bitmap *bitmap) | |
468 | { | |
469 | const struct iommu_dirty_ops *ops = domain->dirty_ops; | |
470 | struct iommu_iotlb_gather gather; | |
471 | struct iommu_dirty_bitmap dirty; | |
472 | struct iova_bitmap_fn_arg arg; | |
473 | struct iova_bitmap *iter; | |
474 | int ret = 0; | |
475 | ||
476 | if (!ops || !ops->read_and_clear_dirty) | |
477 | return -EOPNOTSUPP; | |
478 | ||
479 | iter = iova_bitmap_alloc(bitmap->iova, bitmap->length, | |
480 | bitmap->page_size, | |
481 | u64_to_user_ptr(bitmap->data)); | |
482 | if (IS_ERR(iter)) | |
483 | return -ENOMEM; | |
484 | ||
485 | iommu_dirty_bitmap_init(&dirty, iter, &gather); | |
486 | ||
60984813 | 487 | arg.flags = flags; |
b9a60d6f JM |
488 | arg.iopt = iopt; |
489 | arg.domain = domain; | |
490 | arg.dirty = &dirty; | |
491 | iova_bitmap_for_each(iter, &arg, __iommu_read_and_clear_dirty); | |
492 | ||
60984813 JM |
493 | if (!(flags & IOMMU_DIRTY_NO_CLEAR)) |
494 | iommu_iotlb_sync(domain, &gather); | |
495 | ||
b9a60d6f JM |
496 | iova_bitmap_free(iter); |
497 | ||
498 | return ret; | |
499 | } | |
500 | ||
501 | int iommufd_check_iova_range(struct io_pagetable *iopt, | |
502 | struct iommu_hwpt_get_dirty_bitmap *bitmap) | |
503 | { | |
504 | size_t iommu_pgsize = iopt->iova_alignment; | |
505 | u64 last_iova; | |
506 | ||
507 | if (check_add_overflow(bitmap->iova, bitmap->length - 1, &last_iova)) | |
508 | return -EOVERFLOW; | |
509 | ||
510 | if (bitmap->iova > ULONG_MAX || last_iova > ULONG_MAX) | |
511 | return -EOVERFLOW; | |
512 | ||
513 | if ((bitmap->iova & (iommu_pgsize - 1)) || | |
514 | ((last_iova + 1) & (iommu_pgsize - 1))) | |
515 | return -EINVAL; | |
516 | ||
517 | if (!bitmap->page_size) | |
518 | return -EINVAL; | |
519 | ||
520 | if ((bitmap->iova & (bitmap->page_size - 1)) || | |
521 | ((last_iova + 1) & (bitmap->page_size - 1))) | |
522 | return -EINVAL; | |
523 | ||
524 | return 0; | |
525 | } | |
526 | ||
527 | int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt, | |
528 | struct iommu_domain *domain, | |
529 | unsigned long flags, | |
530 | struct iommu_hwpt_get_dirty_bitmap *bitmap) | |
531 | { | |
532 | int ret; | |
533 | ||
534 | ret = iommufd_check_iova_range(iopt, bitmap); | |
535 | if (ret) | |
536 | return ret; | |
537 | ||
538 | down_read(&iopt->iova_rwsem); | |
539 | ret = iommu_read_and_clear_dirty(domain, iopt, flags, bitmap); | |
540 | up_read(&iopt->iova_rwsem); | |
541 | ||
542 | return ret; | |
543 | } | |
544 | ||
e2a4b294 JM |
545 | static int iopt_clear_dirty_data(struct io_pagetable *iopt, |
546 | struct iommu_domain *domain) | |
547 | { | |
548 | const struct iommu_dirty_ops *ops = domain->dirty_ops; | |
549 | struct iommu_iotlb_gather gather; | |
550 | struct iommu_dirty_bitmap dirty; | |
551 | struct iopt_area *area; | |
552 | int ret = 0; | |
553 | ||
554 | lockdep_assert_held_read(&iopt->iova_rwsem); | |
555 | ||
556 | iommu_dirty_bitmap_init(&dirty, NULL, &gather); | |
557 | ||
558 | for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; | |
559 | area = iopt_area_iter_next(area, 0, ULONG_MAX)) { | |
560 | if (!area->pages) | |
561 | continue; | |
562 | ||
563 | ret = ops->read_and_clear_dirty(domain, iopt_area_iova(area), | |
564 | iopt_area_length(area), 0, | |
565 | &dirty); | |
566 | if (ret) | |
567 | break; | |
568 | } | |
569 | ||
570 | iommu_iotlb_sync(domain, &gather); | |
571 | return ret; | |
572 | } | |
573 | ||
574 | int iopt_set_dirty_tracking(struct io_pagetable *iopt, | |
575 | struct iommu_domain *domain, bool enable) | |
576 | { | |
577 | const struct iommu_dirty_ops *ops = domain->dirty_ops; | |
578 | int ret = 0; | |
579 | ||
580 | if (!ops) | |
581 | return -EOPNOTSUPP; | |
582 | ||
583 | down_read(&iopt->iova_rwsem); | |
584 | ||
585 | /* Clear dirty bits from PTEs to ensure a clean snapshot */ | |
586 | if (enable) { | |
587 | ret = iopt_clear_dirty_data(iopt, domain); | |
588 | if (ret) | |
589 | goto out_unlock; | |
590 | } | |
591 | ||
592 | ret = ops->set_dirty_tracking(domain, enable); | |
593 | ||
594 | out_unlock: | |
595 | up_read(&iopt->iova_rwsem); | |
596 | return ret; | |
597 | } | |
598 | ||
51fe6141 JG |
599 | int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova, |
600 | unsigned long length, struct list_head *pages_list) | |
601 | { | |
602 | struct iopt_area_contig_iter iter; | |
603 | unsigned long last_iova; | |
604 | struct iopt_area *area; | |
605 | int rc; | |
606 | ||
607 | if (!length) | |
608 | return -EINVAL; | |
609 | if (check_add_overflow(iova, length - 1, &last_iova)) | |
610 | return -EOVERFLOW; | |
611 | ||
612 | down_read(&iopt->iova_rwsem); | |
613 | iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { | |
614 | struct iopt_pages_list *elm; | |
615 | unsigned long last = min(last_iova, iopt_area_last_iova(area)); | |
616 | ||
617 | elm = kzalloc(sizeof(*elm), GFP_KERNEL_ACCOUNT); | |
618 | if (!elm) { | |
619 | rc = -ENOMEM; | |
620 | goto err_free; | |
621 | } | |
622 | elm->start_byte = iopt_area_start_byte(area, iter.cur_iova); | |
623 | elm->pages = area->pages; | |
624 | elm->length = (last - iter.cur_iova) + 1; | |
625 | kref_get(&elm->pages->kref); | |
626 | list_add_tail(&elm->next, pages_list); | |
627 | } | |
628 | if (!iopt_area_contig_done(&iter)) { | |
629 | rc = -ENOENT; | |
630 | goto err_free; | |
631 | } | |
632 | up_read(&iopt->iova_rwsem); | |
633 | return 0; | |
634 | err_free: | |
635 | up_read(&iopt->iova_rwsem); | |
636 | iopt_free_pages_list(pages_list); | |
637 | return rc; | |
638 | } | |
639 | ||
640 | static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start, | |
641 | unsigned long last, unsigned long *unmapped) | |
642 | { | |
643 | struct iopt_area *area; | |
644 | unsigned long unmapped_bytes = 0; | |
804ca14d | 645 | unsigned int tries = 0; |
51fe6141 JG |
646 | int rc = -ENOENT; |
647 | ||
648 | /* | |
649 | * The domains_rwsem must be held in read mode any time any area->pages | |
650 | * is NULL. This prevents domain attach/detatch from running | |
651 | * concurrently with cleaning up the area. | |
652 | */ | |
8d40205f | 653 | again: |
51fe6141 JG |
654 | down_read(&iopt->domains_rwsem); |
655 | down_write(&iopt->iova_rwsem); | |
656 | while ((area = iopt_area_iter_first(iopt, start, last))) { | |
657 | unsigned long area_last = iopt_area_last_iova(area); | |
658 | unsigned long area_first = iopt_area_iova(area); | |
659 | struct iopt_pages *pages; | |
660 | ||
661 | /* Userspace should not race map/unmap's of the same area */ | |
662 | if (!area->pages) { | |
663 | rc = -EBUSY; | |
664 | goto out_unlock_iova; | |
665 | } | |
666 | ||
667 | if (area_first < start || area_last > last) { | |
668 | rc = -ENOENT; | |
669 | goto out_unlock_iova; | |
670 | } | |
671 | ||
804ca14d JG |
672 | if (area_first != start) |
673 | tries = 0; | |
674 | ||
51fe6141 JG |
675 | /* |
676 | * num_accesses writers must hold the iova_rwsem too, so we can | |
677 | * safely read it under the write side of the iovam_rwsem | |
678 | * without the pages->mutex. | |
679 | */ | |
680 | if (area->num_accesses) { | |
804ca14d JG |
681 | size_t length = iopt_area_length(area); |
682 | ||
51fe6141 JG |
683 | start = area_first; |
684 | area->prevent_access = true; | |
685 | up_write(&iopt->iova_rwsem); | |
686 | up_read(&iopt->domains_rwsem); | |
804ca14d JG |
687 | |
688 | iommufd_access_notify_unmap(iopt, area_first, length); | |
689 | /* Something is not responding to unmap requests. */ | |
690 | tries++; | |
691 | if (WARN_ON(tries > 100)) | |
8d40205f JG |
692 | return -EDEADLOCK; |
693 | goto again; | |
51fe6141 JG |
694 | } |
695 | ||
696 | pages = area->pages; | |
697 | area->pages = NULL; | |
698 | up_write(&iopt->iova_rwsem); | |
699 | ||
700 | iopt_area_unfill_domains(area, pages); | |
701 | iopt_abort_area(area); | |
702 | iopt_put_pages(pages); | |
703 | ||
704 | unmapped_bytes += area_last - area_first + 1; | |
705 | ||
706 | down_write(&iopt->iova_rwsem); | |
707 | } | |
708 | if (unmapped_bytes) | |
709 | rc = 0; | |
710 | ||
711 | out_unlock_iova: | |
712 | up_write(&iopt->iova_rwsem); | |
713 | up_read(&iopt->domains_rwsem); | |
714 | if (unmapped) | |
715 | *unmapped = unmapped_bytes; | |
716 | return rc; | |
717 | } | |
718 | ||
719 | /** | |
720 | * iopt_unmap_iova() - Remove a range of iova | |
721 | * @iopt: io_pagetable to act on | |
722 | * @iova: Starting iova to unmap | |
723 | * @length: Number of bytes to unmap | |
724 | * @unmapped: Return number of bytes unmapped | |
725 | * | |
726 | * The requested range must be a superset of existing ranges. | |
727 | * Splitting/truncating IOVA mappings is not allowed. | |
728 | */ | |
729 | int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, | |
730 | unsigned long length, unsigned long *unmapped) | |
731 | { | |
732 | unsigned long iova_last; | |
733 | ||
734 | if (!length) | |
735 | return -EINVAL; | |
736 | ||
737 | if (check_add_overflow(iova, length - 1, &iova_last)) | |
738 | return -EOVERFLOW; | |
739 | ||
740 | return iopt_unmap_iova_range(iopt, iova, iova_last, unmapped); | |
741 | } | |
742 | ||
743 | int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped) | |
744 | { | |
745 | int rc; | |
746 | ||
747 | rc = iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped); | |
748 | /* If the IOVAs are empty then unmap all succeeds */ | |
749 | if (rc == -ENOENT) | |
750 | return 0; | |
751 | return rc; | |
752 | } | |
753 | ||
754 | /* The caller must always free all the nodes in the allowed_iova rb_root. */ | |
755 | int iopt_set_allow_iova(struct io_pagetable *iopt, | |
756 | struct rb_root_cached *allowed_iova) | |
757 | { | |
758 | struct iopt_allowed *allowed; | |
759 | ||
760 | down_write(&iopt->iova_rwsem); | |
761 | swap(*allowed_iova, iopt->allowed_itree); | |
762 | ||
763 | for (allowed = iopt_allowed_iter_first(iopt, 0, ULONG_MAX); allowed; | |
764 | allowed = iopt_allowed_iter_next(allowed, 0, ULONG_MAX)) { | |
765 | if (iopt_reserved_iter_first(iopt, allowed->node.start, | |
766 | allowed->node.last)) { | |
767 | swap(*allowed_iova, iopt->allowed_itree); | |
768 | up_write(&iopt->iova_rwsem); | |
769 | return -EADDRINUSE; | |
770 | } | |
771 | } | |
772 | up_write(&iopt->iova_rwsem); | |
773 | return 0; | |
774 | } | |
775 | ||
776 | int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start, | |
777 | unsigned long last, void *owner) | |
778 | { | |
779 | struct iopt_reserved *reserved; | |
780 | ||
781 | lockdep_assert_held_write(&iopt->iova_rwsem); | |
782 | ||
783 | if (iopt_area_iter_first(iopt, start, last) || | |
784 | iopt_allowed_iter_first(iopt, start, last)) | |
785 | return -EADDRINUSE; | |
786 | ||
787 | reserved = kzalloc(sizeof(*reserved), GFP_KERNEL_ACCOUNT); | |
788 | if (!reserved) | |
789 | return -ENOMEM; | |
790 | reserved->node.start = start; | |
791 | reserved->node.last = last; | |
792 | reserved->owner = owner; | |
793 | interval_tree_insert(&reserved->node, &iopt->reserved_itree); | |
794 | return 0; | |
795 | } | |
796 | ||
797 | static void __iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner) | |
798 | { | |
799 | struct iopt_reserved *reserved, *next; | |
800 | ||
801 | lockdep_assert_held_write(&iopt->iova_rwsem); | |
802 | ||
803 | for (reserved = iopt_reserved_iter_first(iopt, 0, ULONG_MAX); reserved; | |
804 | reserved = next) { | |
805 | next = iopt_reserved_iter_next(reserved, 0, ULONG_MAX); | |
806 | ||
807 | if (reserved->owner == owner) { | |
808 | interval_tree_remove(&reserved->node, | |
809 | &iopt->reserved_itree); | |
810 | kfree(reserved); | |
811 | } | |
812 | } | |
813 | } | |
814 | ||
815 | void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner) | |
816 | { | |
817 | down_write(&iopt->iova_rwsem); | |
818 | __iopt_remove_reserved_iova(iopt, owner); | |
819 | up_write(&iopt->iova_rwsem); | |
820 | } | |
821 | ||
822 | void iopt_init_table(struct io_pagetable *iopt) | |
823 | { | |
824 | init_rwsem(&iopt->iova_rwsem); | |
825 | init_rwsem(&iopt->domains_rwsem); | |
826 | iopt->area_itree = RB_ROOT_CACHED; | |
827 | iopt->allowed_itree = RB_ROOT_CACHED; | |
828 | iopt->reserved_itree = RB_ROOT_CACHED; | |
829 | xa_init_flags(&iopt->domains, XA_FLAGS_ACCOUNT); | |
830 | xa_init_flags(&iopt->access_list, XA_FLAGS_ALLOC); | |
831 | ||
832 | /* | |
833 | * iopt's start as SW tables that can use the entire size_t IOVA space | |
834 | * due to the use of size_t in the APIs. They have no alignment | |
835 | * restriction. | |
836 | */ | |
837 | iopt->iova_alignment = 1; | |
838 | } | |
839 | ||
840 | void iopt_destroy_table(struct io_pagetable *iopt) | |
841 | { | |
842 | struct interval_tree_node *node; | |
843 | ||
52f52858 JG |
844 | if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) |
845 | iopt_remove_reserved_iova(iopt, NULL); | |
846 | ||
51fe6141 JG |
847 | while ((node = interval_tree_iter_first(&iopt->allowed_itree, 0, |
848 | ULONG_MAX))) { | |
849 | interval_tree_remove(node, &iopt->allowed_itree); | |
850 | kfree(container_of(node, struct iopt_allowed, node)); | |
851 | } | |
852 | ||
853 | WARN_ON(!RB_EMPTY_ROOT(&iopt->reserved_itree.rb_root)); | |
854 | WARN_ON(!xa_empty(&iopt->domains)); | |
855 | WARN_ON(!xa_empty(&iopt->access_list)); | |
856 | WARN_ON(!RB_EMPTY_ROOT(&iopt->area_itree.rb_root)); | |
857 | } | |
858 | ||
859 | /** | |
860 | * iopt_unfill_domain() - Unfill a domain with PFNs | |
861 | * @iopt: io_pagetable to act on | |
862 | * @domain: domain to unfill | |
863 | * | |
864 | * This is used when removing a domain from the iopt. Every area in the iopt | |
865 | * will be unmapped from the domain. The domain must already be removed from the | |
866 | * domains xarray. | |
867 | */ | |
868 | static void iopt_unfill_domain(struct io_pagetable *iopt, | |
869 | struct iommu_domain *domain) | |
870 | { | |
871 | struct iopt_area *area; | |
872 | ||
873 | lockdep_assert_held(&iopt->iova_rwsem); | |
874 | lockdep_assert_held_write(&iopt->domains_rwsem); | |
875 | ||
876 | /* | |
877 | * Some other domain is holding all the pfns still, rapidly unmap this | |
878 | * domain. | |
879 | */ | |
880 | if (iopt->next_domain_id != 0) { | |
881 | /* Pick an arbitrary remaining domain to act as storage */ | |
882 | struct iommu_domain *storage_domain = | |
883 | xa_load(&iopt->domains, 0); | |
884 | ||
885 | for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; | |
886 | area = iopt_area_iter_next(area, 0, ULONG_MAX)) { | |
887 | struct iopt_pages *pages = area->pages; | |
888 | ||
889 | if (!pages) | |
890 | continue; | |
891 | ||
892 | mutex_lock(&pages->mutex); | |
52f52858 JG |
893 | if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) |
894 | WARN_ON(!area->storage_domain); | |
51fe6141 JG |
895 | if (area->storage_domain == domain) |
896 | area->storage_domain = storage_domain; | |
897 | mutex_unlock(&pages->mutex); | |
898 | ||
899 | iopt_area_unmap_domain(area, domain); | |
900 | } | |
901 | return; | |
902 | } | |
903 | ||
904 | for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; | |
905 | area = iopt_area_iter_next(area, 0, ULONG_MAX)) { | |
906 | struct iopt_pages *pages = area->pages; | |
907 | ||
908 | if (!pages) | |
909 | continue; | |
910 | ||
911 | mutex_lock(&pages->mutex); | |
912 | interval_tree_remove(&area->pages_node, &pages->domains_itree); | |
913 | WARN_ON(area->storage_domain != domain); | |
914 | area->storage_domain = NULL; | |
915 | iopt_area_unfill_domain(area, pages, domain); | |
916 | mutex_unlock(&pages->mutex); | |
917 | } | |
918 | } | |
919 | ||
920 | /** | |
921 | * iopt_fill_domain() - Fill a domain with PFNs | |
922 | * @iopt: io_pagetable to act on | |
923 | * @domain: domain to fill | |
924 | * | |
925 | * Fill the domain with PFNs from every area in the iopt. On failure the domain | |
926 | * is left unchanged. | |
927 | */ | |
928 | static int iopt_fill_domain(struct io_pagetable *iopt, | |
929 | struct iommu_domain *domain) | |
930 | { | |
931 | struct iopt_area *end_area; | |
932 | struct iopt_area *area; | |
933 | int rc; | |
934 | ||
935 | lockdep_assert_held(&iopt->iova_rwsem); | |
936 | lockdep_assert_held_write(&iopt->domains_rwsem); | |
937 | ||
938 | for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; | |
939 | area = iopt_area_iter_next(area, 0, ULONG_MAX)) { | |
940 | struct iopt_pages *pages = area->pages; | |
941 | ||
942 | if (!pages) | |
943 | continue; | |
944 | ||
945 | mutex_lock(&pages->mutex); | |
946 | rc = iopt_area_fill_domain(area, domain); | |
947 | if (rc) { | |
948 | mutex_unlock(&pages->mutex); | |
949 | goto out_unfill; | |
950 | } | |
951 | if (!area->storage_domain) { | |
952 | WARN_ON(iopt->next_domain_id != 0); | |
953 | area->storage_domain = domain; | |
954 | interval_tree_insert(&area->pages_node, | |
955 | &pages->domains_itree); | |
956 | } | |
957 | mutex_unlock(&pages->mutex); | |
958 | } | |
959 | return 0; | |
960 | ||
961 | out_unfill: | |
962 | end_area = area; | |
963 | for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; | |
964 | area = iopt_area_iter_next(area, 0, ULONG_MAX)) { | |
965 | struct iopt_pages *pages = area->pages; | |
966 | ||
967 | if (area == end_area) | |
968 | break; | |
969 | if (!pages) | |
970 | continue; | |
971 | mutex_lock(&pages->mutex); | |
972 | if (iopt->next_domain_id == 0) { | |
973 | interval_tree_remove(&area->pages_node, | |
974 | &pages->domains_itree); | |
975 | area->storage_domain = NULL; | |
976 | } | |
977 | iopt_area_unfill_domain(area, pages, domain); | |
978 | mutex_unlock(&pages->mutex); | |
979 | } | |
980 | return rc; | |
981 | } | |
982 | ||
983 | /* All existing area's conform to an increased page size */ | |
984 | static int iopt_check_iova_alignment(struct io_pagetable *iopt, | |
985 | unsigned long new_iova_alignment) | |
986 | { | |
987 | unsigned long align_mask = new_iova_alignment - 1; | |
988 | struct iopt_area *area; | |
989 | ||
990 | lockdep_assert_held(&iopt->iova_rwsem); | |
991 | lockdep_assert_held(&iopt->domains_rwsem); | |
992 | ||
993 | for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; | |
994 | area = iopt_area_iter_next(area, 0, ULONG_MAX)) | |
995 | if ((iopt_area_iova(area) & align_mask) || | |
996 | (iopt_area_length(area) & align_mask) || | |
997 | (area->page_offset & align_mask)) | |
998 | return -EADDRINUSE; | |
52f52858 JG |
999 | |
1000 | if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) { | |
1001 | struct iommufd_access *access; | |
1002 | unsigned long index; | |
1003 | ||
1004 | xa_for_each(&iopt->access_list, index, access) | |
1005 | if (WARN_ON(access->iova_alignment > | |
1006 | new_iova_alignment)) | |
1007 | return -EADDRINUSE; | |
1008 | } | |
51fe6141 JG |
1009 | return 0; |
1010 | } | |
1011 | ||
1012 | int iopt_table_add_domain(struct io_pagetable *iopt, | |
1013 | struct iommu_domain *domain) | |
1014 | { | |
1015 | const struct iommu_domain_geometry *geometry = &domain->geometry; | |
1016 | struct iommu_domain *iter_domain; | |
1017 | unsigned int new_iova_alignment; | |
1018 | unsigned long index; | |
1019 | int rc; | |
1020 | ||
1021 | down_write(&iopt->domains_rwsem); | |
1022 | down_write(&iopt->iova_rwsem); | |
1023 | ||
1024 | xa_for_each(&iopt->domains, index, iter_domain) { | |
1025 | if (WARN_ON(iter_domain == domain)) { | |
1026 | rc = -EEXIST; | |
1027 | goto out_unlock; | |
1028 | } | |
1029 | } | |
1030 | ||
1031 | /* | |
1032 | * The io page size drives the iova_alignment. Internally the iopt_pages | |
1033 | * works in PAGE_SIZE units and we adjust when mapping sub-PAGE_SIZE | |
1034 | * objects into the iommu_domain. | |
1035 | * | |
1036 | * A iommu_domain must always be able to accept PAGE_SIZE to be | |
1037 | * compatible as we can't guarantee higher contiguity. | |
1038 | */ | |
1039 | new_iova_alignment = max_t(unsigned long, | |
1040 | 1UL << __ffs(domain->pgsize_bitmap), | |
1041 | iopt->iova_alignment); | |
1042 | if (new_iova_alignment > PAGE_SIZE) { | |
1043 | rc = -EINVAL; | |
1044 | goto out_unlock; | |
1045 | } | |
1046 | if (new_iova_alignment != iopt->iova_alignment) { | |
1047 | rc = iopt_check_iova_alignment(iopt, new_iova_alignment); | |
1048 | if (rc) | |
1049 | goto out_unlock; | |
1050 | } | |
1051 | ||
1052 | /* No area exists that is outside the allowed domain aperture */ | |
1053 | if (geometry->aperture_start != 0) { | |
1054 | rc = iopt_reserve_iova(iopt, 0, geometry->aperture_start - 1, | |
1055 | domain); | |
1056 | if (rc) | |
1057 | goto out_reserved; | |
1058 | } | |
1059 | if (geometry->aperture_end != ULONG_MAX) { | |
1060 | rc = iopt_reserve_iova(iopt, geometry->aperture_end + 1, | |
1061 | ULONG_MAX, domain); | |
1062 | if (rc) | |
1063 | goto out_reserved; | |
1064 | } | |
1065 | ||
1066 | rc = xa_reserve(&iopt->domains, iopt->next_domain_id, GFP_KERNEL); | |
1067 | if (rc) | |
1068 | goto out_reserved; | |
1069 | ||
1070 | rc = iopt_fill_domain(iopt, domain); | |
1071 | if (rc) | |
1072 | goto out_release; | |
1073 | ||
1074 | iopt->iova_alignment = new_iova_alignment; | |
1075 | xa_store(&iopt->domains, iopt->next_domain_id, domain, GFP_KERNEL); | |
1076 | iopt->next_domain_id++; | |
1077 | up_write(&iopt->iova_rwsem); | |
1078 | up_write(&iopt->domains_rwsem); | |
1079 | return 0; | |
1080 | out_release: | |
1081 | xa_release(&iopt->domains, iopt->next_domain_id); | |
1082 | out_reserved: | |
1083 | __iopt_remove_reserved_iova(iopt, domain); | |
1084 | out_unlock: | |
1085 | up_write(&iopt->iova_rwsem); | |
1086 | up_write(&iopt->domains_rwsem); | |
1087 | return rc; | |
1088 | } | |
1089 | ||
1090 | static int iopt_calculate_iova_alignment(struct io_pagetable *iopt) | |
1091 | { | |
1092 | unsigned long new_iova_alignment; | |
1093 | struct iommufd_access *access; | |
1094 | struct iommu_domain *domain; | |
1095 | unsigned long index; | |
1096 | ||
1097 | lockdep_assert_held_write(&iopt->iova_rwsem); | |
1098 | lockdep_assert_held(&iopt->domains_rwsem); | |
1099 | ||
1100 | /* See batch_iommu_map_small() */ | |
1101 | if (iopt->disable_large_pages) | |
1102 | new_iova_alignment = PAGE_SIZE; | |
1103 | else | |
1104 | new_iova_alignment = 1; | |
1105 | ||
1106 | xa_for_each(&iopt->domains, index, domain) | |
1107 | new_iova_alignment = max_t(unsigned long, | |
1108 | 1UL << __ffs(domain->pgsize_bitmap), | |
1109 | new_iova_alignment); | |
1110 | xa_for_each(&iopt->access_list, index, access) | |
1111 | new_iova_alignment = max_t(unsigned long, | |
1112 | access->iova_alignment, | |
1113 | new_iova_alignment); | |
1114 | ||
1115 | if (new_iova_alignment > iopt->iova_alignment) { | |
1116 | int rc; | |
1117 | ||
1118 | rc = iopt_check_iova_alignment(iopt, new_iova_alignment); | |
1119 | if (rc) | |
1120 | return rc; | |
1121 | } | |
1122 | iopt->iova_alignment = new_iova_alignment; | |
1123 | return 0; | |
1124 | } | |
1125 | ||
1126 | void iopt_table_remove_domain(struct io_pagetable *iopt, | |
1127 | struct iommu_domain *domain) | |
1128 | { | |
1129 | struct iommu_domain *iter_domain = NULL; | |
1130 | unsigned long index; | |
1131 | ||
1132 | down_write(&iopt->domains_rwsem); | |
1133 | down_write(&iopt->iova_rwsem); | |
1134 | ||
1135 | xa_for_each(&iopt->domains, index, iter_domain) | |
1136 | if (iter_domain == domain) | |
1137 | break; | |
1138 | if (WARN_ON(iter_domain != domain) || index >= iopt->next_domain_id) | |
1139 | goto out_unlock; | |
1140 | ||
1141 | /* | |
1142 | * Compress the xarray to keep it linear by swapping the entry to erase | |
1143 | * with the tail entry and shrinking the tail. | |
1144 | */ | |
1145 | iopt->next_domain_id--; | |
1146 | iter_domain = xa_erase(&iopt->domains, iopt->next_domain_id); | |
1147 | if (index != iopt->next_domain_id) | |
1148 | xa_store(&iopt->domains, index, iter_domain, GFP_KERNEL); | |
1149 | ||
1150 | iopt_unfill_domain(iopt, domain); | |
1151 | __iopt_remove_reserved_iova(iopt, domain); | |
1152 | ||
1153 | WARN_ON(iopt_calculate_iova_alignment(iopt)); | |
1154 | out_unlock: | |
1155 | up_write(&iopt->iova_rwsem); | |
1156 | up_write(&iopt->domains_rwsem); | |
1157 | } | |
1158 | ||
1159 | /** | |
1160 | * iopt_area_split - Split an area into two parts at iova | |
1161 | * @area: The area to split | |
1162 | * @iova: Becomes the last of a new area | |
1163 | * | |
1164 | * This splits an area into two. It is part of the VFIO compatibility to allow | |
1165 | * poking a hole in the mapping. The two areas continue to point at the same | |
1166 | * iopt_pages, just with different starting bytes. | |
1167 | */ | |
1168 | static int iopt_area_split(struct iopt_area *area, unsigned long iova) | |
1169 | { | |
1170 | unsigned long alignment = area->iopt->iova_alignment; | |
1171 | unsigned long last_iova = iopt_area_last_iova(area); | |
1172 | unsigned long start_iova = iopt_area_iova(area); | |
1173 | unsigned long new_start = iova + 1; | |
1174 | struct io_pagetable *iopt = area->iopt; | |
1175 | struct iopt_pages *pages = area->pages; | |
1176 | struct iopt_area *lhs; | |
1177 | struct iopt_area *rhs; | |
1178 | int rc; | |
1179 | ||
1180 | lockdep_assert_held_write(&iopt->iova_rwsem); | |
1181 | ||
1182 | if (iova == start_iova || iova == last_iova) | |
1183 | return 0; | |
1184 | ||
1185 | if (!pages || area->prevent_access) | |
1186 | return -EBUSY; | |
1187 | ||
1188 | if (new_start & (alignment - 1) || | |
1189 | iopt_area_start_byte(area, new_start) & (alignment - 1)) | |
1190 | return -EINVAL; | |
1191 | ||
361d744d | 1192 | lhs = iopt_area_alloc(); |
51fe6141 JG |
1193 | if (!lhs) |
1194 | return -ENOMEM; | |
1195 | ||
361d744d | 1196 | rhs = iopt_area_alloc(); |
51fe6141 JG |
1197 | if (!rhs) { |
1198 | rc = -ENOMEM; | |
1199 | goto err_free_lhs; | |
1200 | } | |
1201 | ||
1202 | mutex_lock(&pages->mutex); | |
1203 | /* | |
1204 | * Splitting is not permitted if an access exists, we don't track enough | |
1205 | * information to split existing accesses. | |
1206 | */ | |
1207 | if (area->num_accesses) { | |
1208 | rc = -EINVAL; | |
1209 | goto err_unlock; | |
1210 | } | |
1211 | ||
1212 | /* | |
1213 | * Splitting is not permitted if a domain could have been mapped with | |
1214 | * huge pages. | |
1215 | */ | |
1216 | if (area->storage_domain && !iopt->disable_large_pages) { | |
1217 | rc = -EINVAL; | |
1218 | goto err_unlock; | |
1219 | } | |
1220 | ||
1221 | interval_tree_remove(&area->node, &iopt->area_itree); | |
1222 | rc = iopt_insert_area(iopt, lhs, area->pages, start_iova, | |
1223 | iopt_area_start_byte(area, start_iova), | |
1224 | (new_start - 1) - start_iova + 1, | |
1225 | area->iommu_prot); | |
1226 | if (WARN_ON(rc)) | |
1227 | goto err_insert; | |
1228 | ||
1229 | rc = iopt_insert_area(iopt, rhs, area->pages, new_start, | |
1230 | iopt_area_start_byte(area, new_start), | |
1231 | last_iova - new_start + 1, area->iommu_prot); | |
1232 | if (WARN_ON(rc)) | |
1233 | goto err_remove_lhs; | |
1234 | ||
e7250ab7 KD |
1235 | /* |
1236 | * If the original area has filled a domain, domains_itree has to be | |
1237 | * updated. | |
1238 | */ | |
1239 | if (area->storage_domain) { | |
1240 | interval_tree_remove(&area->pages_node, &pages->domains_itree); | |
1241 | interval_tree_insert(&lhs->pages_node, &pages->domains_itree); | |
1242 | interval_tree_insert(&rhs->pages_node, &pages->domains_itree); | |
1243 | } | |
1244 | ||
51fe6141 JG |
1245 | lhs->storage_domain = area->storage_domain; |
1246 | lhs->pages = area->pages; | |
1247 | rhs->storage_domain = area->storage_domain; | |
1248 | rhs->pages = area->pages; | |
1249 | kref_get(&rhs->pages->kref); | |
1250 | kfree(area); | |
1251 | mutex_unlock(&pages->mutex); | |
1252 | ||
1253 | /* | |
1254 | * No change to domains or accesses because the pages hasn't been | |
1255 | * changed | |
1256 | */ | |
1257 | return 0; | |
1258 | ||
1259 | err_remove_lhs: | |
1260 | interval_tree_remove(&lhs->node, &iopt->area_itree); | |
1261 | err_insert: | |
1262 | interval_tree_insert(&area->node, &iopt->area_itree); | |
1263 | err_unlock: | |
1264 | mutex_unlock(&pages->mutex); | |
1265 | kfree(rhs); | |
1266 | err_free_lhs: | |
1267 | kfree(lhs); | |
1268 | return rc; | |
1269 | } | |
1270 | ||
1271 | int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas, | |
1272 | size_t num_iovas) | |
1273 | { | |
1274 | int rc = 0; | |
1275 | int i; | |
1276 | ||
1277 | down_write(&iopt->iova_rwsem); | |
1278 | for (i = 0; i < num_iovas; i++) { | |
1279 | struct iopt_area *area; | |
1280 | ||
1281 | area = iopt_area_iter_first(iopt, iovas[i], iovas[i]); | |
1282 | if (!area) | |
1283 | continue; | |
1284 | rc = iopt_area_split(area, iovas[i]); | |
1285 | if (rc) | |
1286 | break; | |
1287 | } | |
1288 | up_write(&iopt->iova_rwsem); | |
1289 | return rc; | |
1290 | } | |
1291 | ||
1292 | void iopt_enable_large_pages(struct io_pagetable *iopt) | |
1293 | { | |
1294 | int rc; | |
1295 | ||
1296 | down_write(&iopt->domains_rwsem); | |
1297 | down_write(&iopt->iova_rwsem); | |
1298 | WRITE_ONCE(iopt->disable_large_pages, false); | |
1299 | rc = iopt_calculate_iova_alignment(iopt); | |
1300 | WARN_ON(rc); | |
1301 | up_write(&iopt->iova_rwsem); | |
1302 | up_write(&iopt->domains_rwsem); | |
1303 | } | |
1304 | ||
1305 | int iopt_disable_large_pages(struct io_pagetable *iopt) | |
1306 | { | |
1307 | int rc = 0; | |
1308 | ||
1309 | down_write(&iopt->domains_rwsem); | |
1310 | down_write(&iopt->iova_rwsem); | |
1311 | if (iopt->disable_large_pages) | |
1312 | goto out_unlock; | |
1313 | ||
1314 | /* Won't do it if domains already have pages mapped in them */ | |
1315 | if (!xa_empty(&iopt->domains) && | |
1316 | !RB_EMPTY_ROOT(&iopt->area_itree.rb_root)) { | |
1317 | rc = -EINVAL; | |
1318 | goto out_unlock; | |
1319 | } | |
1320 | ||
1321 | WRITE_ONCE(iopt->disable_large_pages, true); | |
1322 | rc = iopt_calculate_iova_alignment(iopt); | |
1323 | if (rc) | |
1324 | WRITE_ONCE(iopt->disable_large_pages, false); | |
1325 | out_unlock: | |
1326 | up_write(&iopt->iova_rwsem); | |
1327 | up_write(&iopt->domains_rwsem); | |
1328 | return rc; | |
1329 | } | |
1330 | ||
1331 | int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access) | |
1332 | { | |
1333 | int rc; | |
1334 | ||
1335 | down_write(&iopt->domains_rwsem); | |
1336 | down_write(&iopt->iova_rwsem); | |
1337 | rc = xa_alloc(&iopt->access_list, &access->iopt_access_list_id, access, | |
1338 | xa_limit_16b, GFP_KERNEL_ACCOUNT); | |
1339 | if (rc) | |
1340 | goto out_unlock; | |
1341 | ||
1342 | rc = iopt_calculate_iova_alignment(iopt); | |
1343 | if (rc) { | |
1344 | xa_erase(&iopt->access_list, access->iopt_access_list_id); | |
1345 | goto out_unlock; | |
1346 | } | |
1347 | ||
1348 | out_unlock: | |
1349 | up_write(&iopt->iova_rwsem); | |
1350 | up_write(&iopt->domains_rwsem); | |
1351 | return rc; | |
1352 | } | |
1353 | ||
1354 | void iopt_remove_access(struct io_pagetable *iopt, | |
5d5c85ff NC |
1355 | struct iommufd_access *access, |
1356 | u32 iopt_access_list_id) | |
51fe6141 JG |
1357 | { |
1358 | down_write(&iopt->domains_rwsem); | |
1359 | down_write(&iopt->iova_rwsem); | |
5d5c85ff | 1360 | WARN_ON(xa_erase(&iopt->access_list, iopt_access_list_id) != access); |
51fe6141 JG |
1361 | WARN_ON(iopt_calculate_iova_alignment(iopt)); |
1362 | up_write(&iopt->iova_rwsem); | |
1363 | up_write(&iopt->domains_rwsem); | |
1364 | } | |
1365 | ||
34f327a9 JG |
1366 | /* Narrow the valid_iova_itree to include reserved ranges from a device. */ |
1367 | int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt, | |
1368 | struct device *dev, | |
1369 | phys_addr_t *sw_msi_start) | |
51fe6141 JG |
1370 | { |
1371 | struct iommu_resv_region *resv; | |
34f327a9 | 1372 | LIST_HEAD(resv_regions); |
d6c55c0a JG |
1373 | unsigned int num_hw_msi = 0; |
1374 | unsigned int num_sw_msi = 0; | |
51fe6141 JG |
1375 | int rc; |
1376 | ||
6583c865 JG |
1377 | if (iommufd_should_fail()) |
1378 | return -EINVAL; | |
1379 | ||
51fe6141 | 1380 | down_write(&iopt->iova_rwsem); |
34f327a9 JG |
1381 | /* FIXME: drivers allocate memory but there is no failure propogated */ |
1382 | iommu_get_resv_regions(dev, &resv_regions); | |
51fe6141 | 1383 | |
34f327a9 | 1384 | list_for_each_entry(resv, &resv_regions, list) { |
51fe6141 JG |
1385 | if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE) |
1386 | continue; | |
1387 | ||
d6c55c0a JG |
1388 | if (sw_msi_start && resv->type == IOMMU_RESV_MSI) |
1389 | num_hw_msi++; | |
1390 | if (sw_msi_start && resv->type == IOMMU_RESV_SW_MSI) { | |
51fe6141 | 1391 | *sw_msi_start = resv->start; |
d6c55c0a JG |
1392 | num_sw_msi++; |
1393 | } | |
51fe6141 JG |
1394 | |
1395 | rc = iopt_reserve_iova(iopt, resv->start, | |
34f327a9 | 1396 | resv->length - 1 + resv->start, dev); |
51fe6141 JG |
1397 | if (rc) |
1398 | goto out_reserved; | |
1399 | } | |
d6c55c0a JG |
1400 | |
1401 | /* Drivers must offer sane combinations of regions */ | |
1402 | if (WARN_ON(num_sw_msi && num_hw_msi) || WARN_ON(num_sw_msi > 1)) { | |
1403 | rc = -EINVAL; | |
1404 | goto out_reserved; | |
1405 | } | |
1406 | ||
51fe6141 JG |
1407 | rc = 0; |
1408 | goto out_free_resv; | |
1409 | ||
1410 | out_reserved: | |
34f327a9 | 1411 | __iopt_remove_reserved_iova(iopt, dev); |
51fe6141 | 1412 | out_free_resv: |
34f327a9 | 1413 | iommu_put_resv_regions(dev, &resv_regions); |
51fe6141 JG |
1414 | up_write(&iopt->iova_rwsem); |
1415 | return rc; | |
1416 | } |