Commit | Line | Data |
---|---|---|
e8d57210 JG |
1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES | |
3 | */ | |
4 | #include <linux/iommufd.h> | |
5 | #include <linux/slab.h> | |
6 | #include <linux/iommu.h> | |
e8d57210 | 7 | |
8d40205f | 8 | #include "io_pagetable.h" |
e8d57210 JG |
9 | #include "iommufd_private.h" |
10 | ||
11 | static bool allow_unsafe_interrupts; | |
12 | module_param(allow_unsafe_interrupts, bool, S_IRUGO | S_IWUSR); | |
13 | MODULE_PARM_DESC( | |
14 | allow_unsafe_interrupts, | |
15 | "Allow IOMMUFD to bind to devices even if the platform cannot isolate " | |
16 | "the MSI interrupt window. Enabling this is a security weakness."); | |
17 | ||
e8d57210 JG |
18 | void iommufd_device_destroy(struct iommufd_object *obj) |
19 | { | |
20 | struct iommufd_device *idev = | |
21 | container_of(obj, struct iommufd_device, obj); | |
22 | ||
23 | iommu_device_release_dma_owner(idev->dev); | |
24 | iommu_group_put(idev->group); | |
65c619ae JG |
25 | if (!iommufd_selftest_is_mock_dev(idev->dev)) |
26 | iommufd_ctx_put(idev->ictx); | |
e8d57210 JG |
27 | } |
28 | ||
29 | /** | |
30 | * iommufd_device_bind - Bind a physical device to an iommu fd | |
31 | * @ictx: iommufd file descriptor | |
32 | * @dev: Pointer to a physical device struct | |
33 | * @id: Output ID number to return to userspace for this device | |
34 | * | |
35 | * A successful bind establishes an ownership over the device and returns | |
36 | * struct iommufd_device pointer, otherwise returns error pointer. | |
37 | * | |
38 | * A driver using this API must set driver_managed_dma and must not touch | |
39 | * the device until this routine succeeds and establishes ownership. | |
40 | * | |
41 | * Binding a PCI device places the entire RID under iommufd control. | |
42 | * | |
43 | * The caller must undo this with iommufd_device_unbind() | |
44 | */ | |
45 | struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, | |
46 | struct device *dev, u32 *id) | |
47 | { | |
48 | struct iommufd_device *idev; | |
49 | struct iommu_group *group; | |
50 | int rc; | |
51 | ||
52 | /* | |
53 | * iommufd always sets IOMMU_CACHE because we offer no way for userspace | |
54 | * to restore cache coherency. | |
55 | */ | |
56 | if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) | |
57 | return ERR_PTR(-EINVAL); | |
58 | ||
59 | group = iommu_group_get(dev); | |
60 | if (!group) | |
61 | return ERR_PTR(-ENODEV); | |
62 | ||
63 | rc = iommu_device_claim_dma_owner(dev, ictx); | |
64 | if (rc) | |
65 | goto out_group_put; | |
66 | ||
67 | idev = iommufd_object_alloc(ictx, idev, IOMMUFD_OBJ_DEVICE); | |
68 | if (IS_ERR(idev)) { | |
69 | rc = PTR_ERR(idev); | |
70 | goto out_release_owner; | |
71 | } | |
72 | idev->ictx = ictx; | |
65c619ae JG |
73 | if (!iommufd_selftest_is_mock_dev(dev)) |
74 | iommufd_ctx_get(ictx); | |
e8d57210 JG |
75 | idev->dev = dev; |
76 | idev->enforce_cache_coherency = | |
77 | device_iommu_capable(dev, IOMMU_CAP_ENFORCE_CACHE_COHERENCY); | |
78 | /* The calling driver is a user until iommufd_device_unbind() */ | |
79 | refcount_inc(&idev->obj.users); | |
80 | /* group refcount moves into iommufd_device */ | |
81 | idev->group = group; | |
82 | ||
83 | /* | |
84 | * If the caller fails after this success it must call | |
85 | * iommufd_unbind_device() which is safe since we hold this refcount. | |
86 | * This also means the device is a leaf in the graph and no other object | |
87 | * can take a reference on it. | |
88 | */ | |
89 | iommufd_object_finalize(ictx, &idev->obj); | |
90 | *id = idev->obj.id; | |
91 | return idev; | |
92 | ||
93 | out_release_owner: | |
94 | iommu_device_release_dma_owner(dev); | |
95 | out_group_put: | |
96 | iommu_group_put(group); | |
97 | return ERR_PTR(rc); | |
98 | } | |
99 | EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD); | |
100 | ||
101 | /** | |
102 | * iommufd_device_unbind - Undo iommufd_device_bind() | |
103 | * @idev: Device returned by iommufd_device_bind() | |
104 | * | |
105 | * Release the device from iommufd control. The DMA ownership will return back | |
106 | * to unowned with DMA controlled by the DMA API. This invalidates the | |
107 | * iommufd_device pointer, other APIs that consume it must not be called | |
108 | * concurrently. | |
109 | */ | |
110 | void iommufd_device_unbind(struct iommufd_device *idev) | |
111 | { | |
99f98a7c | 112 | iommufd_object_destroy_user(idev->ictx, &idev->obj); |
e8d57210 JG |
113 | } |
114 | EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD); | |
115 | ||
116 | static int iommufd_device_setup_msi(struct iommufd_device *idev, | |
117 | struct iommufd_hw_pagetable *hwpt, | |
118 | phys_addr_t sw_msi_start) | |
119 | { | |
120 | int rc; | |
121 | ||
122 | /* | |
d6c55c0a JG |
123 | * If the IOMMU driver gives a IOMMU_RESV_SW_MSI then it is asking us to |
124 | * call iommu_get_msi_cookie() on its behalf. This is necessary to setup | |
125 | * the MSI window so iommu_dma_prepare_msi() can install pages into our | |
126 | * domain after request_irq(). If it is not done interrupts will not | |
127 | * work on this domain. | |
e8d57210 JG |
128 | * |
129 | * FIXME: This is conceptually broken for iommufd since we want to allow | |
130 | * userspace to change the domains, eg switch from an identity IOAS to a | |
131 | * DMA IOAS. There is currently no way to create a MSI window that | |
132 | * matches what the IRQ layer actually expects in a newly created | |
133 | * domain. | |
134 | */ | |
d6c55c0a JG |
135 | if (sw_msi_start != PHYS_ADDR_MAX && !hwpt->msi_cookie) { |
136 | rc = iommu_get_msi_cookie(hwpt->domain, sw_msi_start); | |
137 | if (rc) | |
138 | return rc; | |
139 | ||
e8d57210 JG |
140 | /* |
141 | * iommu_get_msi_cookie() can only be called once per domain, | |
142 | * it returns -EBUSY on later calls. | |
143 | */ | |
e8d57210 | 144 | hwpt->msi_cookie = true; |
e8d57210 JG |
145 | } |
146 | ||
147 | /* | |
d6c55c0a JG |
148 | * For historical compat with VFIO the insecure interrupt path is |
149 | * allowed if the module parameter is set. Insecure means that a MemWr | |
150 | * operation from the device (eg a simple DMA) cannot trigger an | |
151 | * interrupt outside this iommufd context. | |
e8d57210 | 152 | */ |
65c619ae JG |
153 | if (!iommufd_selftest_is_mock_dev(idev->dev) && |
154 | !iommu_group_has_isolated_msi(idev->group)) { | |
d6c55c0a JG |
155 | if (!allow_unsafe_interrupts) |
156 | return -EPERM; | |
e8d57210 | 157 | |
d6c55c0a JG |
158 | dev_warn( |
159 | idev->dev, | |
160 | "MSI interrupts are not secure, they cannot be isolated by the platform. " | |
161 | "Check that platform features like interrupt remapping are enabled. " | |
162 | "Use the \"allow_unsafe_interrupts\" module parameter to override\n"); | |
163 | } | |
e8d57210 JG |
164 | return 0; |
165 | } | |
166 | ||
167 | static bool iommufd_hw_pagetable_has_group(struct iommufd_hw_pagetable *hwpt, | |
168 | struct iommu_group *group) | |
169 | { | |
170 | struct iommufd_device *cur_dev; | |
171 | ||
085fcc7e JG |
172 | lockdep_assert_held(&hwpt->devices_lock); |
173 | ||
e8d57210 JG |
174 | list_for_each_entry(cur_dev, &hwpt->devices, devices_item) |
175 | if (cur_dev->group == group) | |
176 | return true; | |
177 | return false; | |
178 | } | |
179 | ||
339fbf3a JG |
180 | int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, |
181 | struct iommufd_device *idev) | |
e8d57210 | 182 | { |
d6c55c0a | 183 | phys_addr_t sw_msi_start = PHYS_ADDR_MAX; |
e8d57210 JG |
184 | int rc; |
185 | ||
339fbf3a JG |
186 | lockdep_assert_held(&hwpt->devices_lock); |
187 | ||
188 | if (WARN_ON(idev->hwpt)) | |
189 | return -EINVAL; | |
e8d57210 JG |
190 | |
191 | /* | |
192 | * Try to upgrade the domain we have, it is an iommu driver bug to | |
193 | * report IOMMU_CAP_ENFORCE_CACHE_COHERENCY but fail | |
194 | * enforce_cache_coherency when there are no devices attached to the | |
195 | * domain. | |
196 | */ | |
197 | if (idev->enforce_cache_coherency && !hwpt->enforce_cache_coherency) { | |
198 | if (hwpt->domain->ops->enforce_cache_coherency) | |
199 | hwpt->enforce_cache_coherency = | |
200 | hwpt->domain->ops->enforce_cache_coherency( | |
201 | hwpt->domain); | |
202 | if (!hwpt->enforce_cache_coherency) { | |
203 | WARN_ON(list_empty(&hwpt->devices)); | |
339fbf3a | 204 | return -EINVAL; |
e8d57210 JG |
205 | } |
206 | } | |
207 | ||
208 | rc = iopt_table_enforce_group_resv_regions(&hwpt->ioas->iopt, idev->dev, | |
209 | idev->group, &sw_msi_start); | |
210 | if (rc) | |
339fbf3a | 211 | return rc; |
e8d57210 JG |
212 | |
213 | rc = iommufd_device_setup_msi(idev, hwpt, sw_msi_start); | |
214 | if (rc) | |
339fbf3a | 215 | goto err_unresv; |
e8d57210 JG |
216 | |
217 | /* | |
218 | * FIXME: Hack around missing a device-centric iommu api, only attach to | |
219 | * the group once for the first device that is in the group. | |
220 | */ | |
221 | if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) { | |
222 | rc = iommu_attach_group(hwpt->domain, idev->group); | |
223 | if (rc) | |
339fbf3a | 224 | goto err_unresv; |
e8d57210 | 225 | } |
339fbf3a JG |
226 | return 0; |
227 | err_unresv: | |
228 | iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev); | |
229 | return rc; | |
230 | } | |
231 | ||
232 | void iommufd_hw_pagetable_detach(struct iommufd_hw_pagetable *hwpt, | |
233 | struct iommufd_device *idev) | |
234 | { | |
235 | if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) | |
236 | iommu_detach_group(hwpt->domain, idev->group); | |
237 | iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev); | |
238 | } | |
239 | ||
240 | static int iommufd_device_do_attach(struct iommufd_device *idev, | |
241 | struct iommufd_hw_pagetable *hwpt) | |
242 | { | |
243 | int rc; | |
244 | ||
245 | mutex_lock(&hwpt->devices_lock); | |
246 | rc = iommufd_hw_pagetable_attach(hwpt, idev); | |
247 | if (rc) | |
248 | goto out_unlock; | |
e8d57210 JG |
249 | |
250 | idev->hwpt = hwpt; | |
251 | refcount_inc(&hwpt->obj.users); | |
252 | list_add(&idev->devices_item, &hwpt->devices); | |
e8d57210 JG |
253 | out_unlock: |
254 | mutex_unlock(&hwpt->devices_lock); | |
255 | return rc; | |
256 | } | |
257 | ||
258 | /* | |
259 | * When automatically managing the domains we search for a compatible domain in | |
260 | * the iopt and if one is found use it, otherwise create a new domain. | |
261 | * Automatic domain selection will never pick a manually created domain. | |
262 | */ | |
263 | static int iommufd_device_auto_get_domain(struct iommufd_device *idev, | |
264 | struct iommufd_ioas *ioas) | |
265 | { | |
266 | struct iommufd_hw_pagetable *hwpt; | |
267 | int rc; | |
268 | ||
269 | /* | |
270 | * There is no differentiation when domains are allocated, so any domain | |
271 | * that is willing to attach to the device is interchangeable with any | |
272 | * other. | |
273 | */ | |
274 | mutex_lock(&ioas->mutex); | |
275 | list_for_each_entry(hwpt, &ioas->hwpt_list, hwpt_item) { | |
276 | if (!hwpt->auto_domain) | |
277 | continue; | |
278 | ||
7214c1c8 JG |
279 | if (!iommufd_lock_obj(&hwpt->obj)) |
280 | continue; | |
e8d57210 | 281 | rc = iommufd_device_do_attach(idev, hwpt); |
7214c1c8 | 282 | iommufd_put_object(&hwpt->obj); |
e8d57210 JG |
283 | |
284 | /* | |
285 | * -EINVAL means the domain is incompatible with the device. | |
286 | * Other error codes should propagate to userspace as failure. | |
287 | * Success means the domain is attached. | |
288 | */ | |
289 | if (rc == -EINVAL) | |
290 | continue; | |
291 | goto out_unlock; | |
292 | } | |
293 | ||
339fbf3a | 294 | hwpt = iommufd_hw_pagetable_alloc(idev->ictx, ioas, idev, true); |
e8d57210 JG |
295 | if (IS_ERR(hwpt)) { |
296 | rc = PTR_ERR(hwpt); | |
297 | goto out_unlock; | |
298 | } | |
299 | hwpt->auto_domain = true; | |
300 | ||
e8d57210 JG |
301 | mutex_unlock(&ioas->mutex); |
302 | iommufd_object_finalize(idev->ictx, &hwpt->obj); | |
303 | return 0; | |
e8d57210 JG |
304 | out_unlock: |
305 | mutex_unlock(&ioas->mutex); | |
306 | return rc; | |
307 | } | |
308 | ||
309 | /** | |
310 | * iommufd_device_attach - Connect a device from an iommu_domain | |
311 | * @idev: device to attach | |
312 | * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE | |
313 | * Output the IOMMUFD_OBJ_HW_PAGETABLE ID | |
314 | * | |
315 | * This connects the device to an iommu_domain, either automatically or manually | |
316 | * selected. Once this completes the device could do DMA. | |
317 | * | |
318 | * The caller should return the resulting pt_id back to userspace. | |
319 | * This function is undone by calling iommufd_device_detach(). | |
320 | */ | |
321 | int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) | |
322 | { | |
323 | struct iommufd_object *pt_obj; | |
324 | int rc; | |
325 | ||
326 | pt_obj = iommufd_get_object(idev->ictx, *pt_id, IOMMUFD_OBJ_ANY); | |
327 | if (IS_ERR(pt_obj)) | |
328 | return PTR_ERR(pt_obj); | |
329 | ||
330 | switch (pt_obj->type) { | |
331 | case IOMMUFD_OBJ_HW_PAGETABLE: { | |
332 | struct iommufd_hw_pagetable *hwpt = | |
333 | container_of(pt_obj, struct iommufd_hw_pagetable, obj); | |
334 | ||
335 | rc = iommufd_device_do_attach(idev, hwpt); | |
336 | if (rc) | |
337 | goto out_put_pt_obj; | |
e8d57210 JG |
338 | break; |
339 | } | |
340 | case IOMMUFD_OBJ_IOAS: { | |
341 | struct iommufd_ioas *ioas = | |
342 | container_of(pt_obj, struct iommufd_ioas, obj); | |
343 | ||
344 | rc = iommufd_device_auto_get_domain(idev, ioas); | |
345 | if (rc) | |
346 | goto out_put_pt_obj; | |
347 | break; | |
348 | } | |
349 | default: | |
350 | rc = -EINVAL; | |
351 | goto out_put_pt_obj; | |
352 | } | |
353 | ||
354 | refcount_inc(&idev->obj.users); | |
355 | *pt_id = idev->hwpt->obj.id; | |
356 | rc = 0; | |
357 | ||
358 | out_put_pt_obj: | |
359 | iommufd_put_object(pt_obj); | |
360 | return rc; | |
361 | } | |
362 | EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD); | |
363 | ||
364 | /** | |
365 | * iommufd_device_detach - Disconnect a device to an iommu_domain | |
366 | * @idev: device to detach | |
367 | * | |
368 | * Undo iommufd_device_attach(). This disconnects the idev from the previously | |
369 | * attached pt_id. The device returns back to a blocked DMA translation. | |
370 | */ | |
371 | void iommufd_device_detach(struct iommufd_device *idev) | |
372 | { | |
373 | struct iommufd_hw_pagetable *hwpt = idev->hwpt; | |
374 | ||
e8d57210 JG |
375 | mutex_lock(&hwpt->devices_lock); |
376 | list_del(&idev->devices_item); | |
25cde97d | 377 | idev->hwpt = NULL; |
339fbf3a | 378 | iommufd_hw_pagetable_detach(hwpt, idev); |
e8d57210 | 379 | mutex_unlock(&hwpt->devices_lock); |
e8d57210 JG |
380 | |
381 | if (hwpt->auto_domain) | |
99f98a7c | 382 | iommufd_object_deref_user(idev->ictx, &hwpt->obj); |
e8d57210 JG |
383 | else |
384 | refcount_dec(&hwpt->obj.users); | |
385 | ||
e8d57210 JG |
386 | refcount_dec(&idev->obj.users); |
387 | } | |
388 | EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, IOMMUFD); | |
8d40205f JG |
389 | |
390 | void iommufd_access_destroy_object(struct iommufd_object *obj) | |
391 | { | |
392 | struct iommufd_access *access = | |
393 | container_of(obj, struct iommufd_access, obj); | |
394 | ||
54b47585 NC |
395 | if (access->ioas) { |
396 | iopt_remove_access(&access->ioas->iopt, access); | |
397 | refcount_dec(&access->ioas->obj.users); | |
398 | access->ioas = NULL; | |
399 | } | |
8d40205f | 400 | iommufd_ctx_put(access->ictx); |
8d40205f JG |
401 | } |
402 | ||
403 | /** | |
404 | * iommufd_access_create - Create an iommufd_access | |
405 | * @ictx: iommufd file descriptor | |
8d40205f JG |
406 | * @ops: Driver's ops to associate with the access |
407 | * @data: Opaque data to pass into ops functions | |
632fda7f | 408 | * @id: Output ID number to return to userspace for this access |
8d40205f JG |
409 | * |
410 | * An iommufd_access allows a driver to read/write to the IOAS without using | |
411 | * DMA. The underlying CPU memory can be accessed using the | |
412 | * iommufd_access_pin_pages() or iommufd_access_rw() functions. | |
413 | * | |
414 | * The provided ops are required to use iommufd_access_pin_pages(). | |
415 | */ | |
416 | struct iommufd_access * | |
54b47585 | 417 | iommufd_access_create(struct iommufd_ctx *ictx, |
632fda7f | 418 | const struct iommufd_access_ops *ops, void *data, u32 *id) |
8d40205f JG |
419 | { |
420 | struct iommufd_access *access; | |
8d40205f JG |
421 | |
422 | /* | |
423 | * There is no uAPI for the access object, but to keep things symmetric | |
424 | * use the object infrastructure anyhow. | |
425 | */ | |
426 | access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS); | |
427 | if (IS_ERR(access)) | |
428 | return access; | |
429 | ||
430 | access->data = data; | |
431 | access->ops = ops; | |
432 | ||
8d40205f JG |
433 | if (ops->needs_pin_pages) |
434 | access->iova_alignment = PAGE_SIZE; | |
435 | else | |
436 | access->iova_alignment = 1; | |
8d40205f JG |
437 | |
438 | /* The calling driver is a user until iommufd_access_destroy() */ | |
439 | refcount_inc(&access->obj.users); | |
440 | access->ictx = ictx; | |
441 | iommufd_ctx_get(ictx); | |
442 | iommufd_object_finalize(ictx, &access->obj); | |
632fda7f | 443 | *id = access->obj.id; |
8d40205f | 444 | return access; |
8d40205f JG |
445 | } |
446 | EXPORT_SYMBOL_NS_GPL(iommufd_access_create, IOMMUFD); | |
447 | ||
448 | /** | |
449 | * iommufd_access_destroy - Destroy an iommufd_access | |
450 | * @access: The access to destroy | |
451 | * | |
452 | * The caller must stop using the access before destroying it. | |
453 | */ | |
454 | void iommufd_access_destroy(struct iommufd_access *access) | |
455 | { | |
99f98a7c | 456 | iommufd_object_destroy_user(access->ictx, &access->obj); |
8d40205f JG |
457 | } |
458 | EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD); | |
459 | ||
54b47585 NC |
460 | int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id) |
461 | { | |
462 | struct iommufd_ioas *new_ioas; | |
463 | int rc = 0; | |
464 | ||
465 | if (access->ioas) | |
466 | return -EINVAL; | |
467 | ||
468 | new_ioas = iommufd_get_ioas(access->ictx, ioas_id); | |
469 | if (IS_ERR(new_ioas)) | |
470 | return PTR_ERR(new_ioas); | |
471 | ||
472 | rc = iopt_add_access(&new_ioas->iopt, access); | |
473 | if (rc) { | |
474 | iommufd_put_object(&new_ioas->obj); | |
475 | return rc; | |
476 | } | |
477 | iommufd_ref_to_users(&new_ioas->obj); | |
478 | ||
479 | access->ioas = new_ioas; | |
480 | return 0; | |
481 | } | |
482 | EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, IOMMUFD); | |
483 | ||
8d40205f JG |
484 | /** |
485 | * iommufd_access_notify_unmap - Notify users of an iopt to stop using it | |
486 | * @iopt: iopt to work on | |
487 | * @iova: Starting iova in the iopt | |
488 | * @length: Number of bytes | |
489 | * | |
490 | * After this function returns there should be no users attached to the pages | |
491 | * linked to this iopt that intersect with iova,length. Anyone that has attached | |
492 | * a user through iopt_access_pages() needs to detach it through | |
493 | * iommufd_access_unpin_pages() before this function returns. | |
494 | * | |
495 | * iommufd_access_destroy() will wait for any outstanding unmap callback to | |
496 | * complete. Once iommufd_access_destroy() no unmap ops are running or will | |
497 | * run in the future. Due to this a driver must not create locking that prevents | |
498 | * unmap to complete while iommufd_access_destroy() is running. | |
499 | */ | |
500 | void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, | |
501 | unsigned long length) | |
502 | { | |
503 | struct iommufd_ioas *ioas = | |
504 | container_of(iopt, struct iommufd_ioas, iopt); | |
505 | struct iommufd_access *access; | |
506 | unsigned long index; | |
507 | ||
508 | xa_lock(&ioas->iopt.access_list); | |
509 | xa_for_each(&ioas->iopt.access_list, index, access) { | |
510 | if (!iommufd_lock_obj(&access->obj)) | |
511 | continue; | |
512 | xa_unlock(&ioas->iopt.access_list); | |
513 | ||
514 | access->ops->unmap(access->data, iova, length); | |
515 | ||
516 | iommufd_put_object(&access->obj); | |
517 | xa_lock(&ioas->iopt.access_list); | |
518 | } | |
519 | xa_unlock(&ioas->iopt.access_list); | |
520 | } | |
521 | ||
522 | /** | |
523 | * iommufd_access_unpin_pages() - Undo iommufd_access_pin_pages | |
524 | * @access: IOAS access to act on | |
525 | * @iova: Starting IOVA | |
526 | * @length: Number of bytes to access | |
527 | * | |
528 | * Return the struct page's. The caller must stop accessing them before calling | |
529 | * this. The iova/length must exactly match the one provided to access_pages. | |
530 | */ | |
531 | void iommufd_access_unpin_pages(struct iommufd_access *access, | |
532 | unsigned long iova, unsigned long length) | |
533 | { | |
534 | struct io_pagetable *iopt = &access->ioas->iopt; | |
535 | struct iopt_area_contig_iter iter; | |
536 | unsigned long last_iova; | |
537 | struct iopt_area *area; | |
538 | ||
539 | if (WARN_ON(!length) || | |
540 | WARN_ON(check_add_overflow(iova, length - 1, &last_iova))) | |
541 | return; | |
542 | ||
543 | down_read(&iopt->iova_rwsem); | |
544 | iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) | |
545 | iopt_area_remove_access( | |
546 | area, iopt_area_iova_to_index(area, iter.cur_iova), | |
547 | iopt_area_iova_to_index( | |
548 | area, | |
549 | min(last_iova, iopt_area_last_iova(area)))); | |
8d40205f | 550 | WARN_ON(!iopt_area_contig_done(&iter)); |
dbe245cd | 551 | up_read(&iopt->iova_rwsem); |
8d40205f JG |
552 | } |
553 | EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, IOMMUFD); | |
554 | ||
555 | static bool iopt_area_contig_is_aligned(struct iopt_area_contig_iter *iter) | |
556 | { | |
557 | if (iopt_area_start_byte(iter->area, iter->cur_iova) % PAGE_SIZE) | |
558 | return false; | |
559 | ||
560 | if (!iopt_area_contig_done(iter) && | |
561 | (iopt_area_start_byte(iter->area, iopt_area_last_iova(iter->area)) % | |
562 | PAGE_SIZE) != (PAGE_SIZE - 1)) | |
563 | return false; | |
564 | return true; | |
565 | } | |
566 | ||
567 | static bool check_area_prot(struct iopt_area *area, unsigned int flags) | |
568 | { | |
569 | if (flags & IOMMUFD_ACCESS_RW_WRITE) | |
570 | return area->iommu_prot & IOMMU_WRITE; | |
571 | return area->iommu_prot & IOMMU_READ; | |
572 | } | |
573 | ||
574 | /** | |
575 | * iommufd_access_pin_pages() - Return a list of pages under the iova | |
576 | * @access: IOAS access to act on | |
577 | * @iova: Starting IOVA | |
578 | * @length: Number of bytes to access | |
579 | * @out_pages: Output page list | |
580 | * @flags: IOPMMUFD_ACCESS_RW_* flags | |
581 | * | |
582 | * Reads @length bytes starting at iova and returns the struct page * pointers. | |
583 | * These can be kmap'd by the caller for CPU access. | |
584 | * | |
585 | * The caller must perform iommufd_access_unpin_pages() when done to balance | |
586 | * this. | |
587 | * | |
588 | * This API always requires a page aligned iova. This happens naturally if the | |
589 | * ioas alignment is >= PAGE_SIZE and the iova is PAGE_SIZE aligned. However | |
590 | * smaller alignments have corner cases where this API can fail on otherwise | |
591 | * aligned iova. | |
592 | */ | |
593 | int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, | |
594 | unsigned long length, struct page **out_pages, | |
595 | unsigned int flags) | |
596 | { | |
597 | struct io_pagetable *iopt = &access->ioas->iopt; | |
598 | struct iopt_area_contig_iter iter; | |
599 | unsigned long last_iova; | |
600 | struct iopt_area *area; | |
601 | int rc; | |
602 | ||
52f52858 JG |
603 | /* Driver's ops don't support pin_pages */ |
604 | if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && | |
605 | WARN_ON(access->iova_alignment != PAGE_SIZE || !access->ops->unmap)) | |
606 | return -EINVAL; | |
607 | ||
8d40205f JG |
608 | if (!length) |
609 | return -EINVAL; | |
610 | if (check_add_overflow(iova, length - 1, &last_iova)) | |
611 | return -EOVERFLOW; | |
612 | ||
613 | down_read(&iopt->iova_rwsem); | |
614 | iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { | |
615 | unsigned long last = min(last_iova, iopt_area_last_iova(area)); | |
616 | unsigned long last_index = iopt_area_iova_to_index(area, last); | |
617 | unsigned long index = | |
618 | iopt_area_iova_to_index(area, iter.cur_iova); | |
619 | ||
620 | if (area->prevent_access || | |
621 | !iopt_area_contig_is_aligned(&iter)) { | |
622 | rc = -EINVAL; | |
623 | goto err_remove; | |
624 | } | |
625 | ||
626 | if (!check_area_prot(area, flags)) { | |
627 | rc = -EPERM; | |
628 | goto err_remove; | |
629 | } | |
630 | ||
631 | rc = iopt_area_add_access(area, index, last_index, out_pages, | |
632 | flags); | |
633 | if (rc) | |
634 | goto err_remove; | |
635 | out_pages += last_index - index + 1; | |
636 | } | |
637 | if (!iopt_area_contig_done(&iter)) { | |
638 | rc = -ENOENT; | |
639 | goto err_remove; | |
640 | } | |
641 | ||
642 | up_read(&iopt->iova_rwsem); | |
643 | return 0; | |
644 | ||
645 | err_remove: | |
646 | if (iova < iter.cur_iova) { | |
647 | last_iova = iter.cur_iova - 1; | |
648 | iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) | |
649 | iopt_area_remove_access( | |
650 | area, | |
651 | iopt_area_iova_to_index(area, iter.cur_iova), | |
652 | iopt_area_iova_to_index( | |
653 | area, min(last_iova, | |
654 | iopt_area_last_iova(area)))); | |
655 | } | |
656 | up_read(&iopt->iova_rwsem); | |
657 | return rc; | |
658 | } | |
659 | EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD); | |
660 | ||
661 | /** | |
662 | * iommufd_access_rw - Read or write data under the iova | |
663 | * @access: IOAS access to act on | |
664 | * @iova: Starting IOVA | |
665 | * @data: Kernel buffer to copy to/from | |
666 | * @length: Number of bytes to access | |
667 | * @flags: IOMMUFD_ACCESS_RW_* flags | |
668 | * | |
669 | * Copy kernel to/from data into the range given by IOVA/length. If flags | |
670 | * indicates IOMMUFD_ACCESS_RW_KTHREAD then a large copy can be optimized | |
671 | * by changing it into copy_to/from_user(). | |
672 | */ | |
673 | int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, | |
674 | void *data, size_t length, unsigned int flags) | |
675 | { | |
676 | struct io_pagetable *iopt = &access->ioas->iopt; | |
677 | struct iopt_area_contig_iter iter; | |
678 | struct iopt_area *area; | |
679 | unsigned long last_iova; | |
680 | int rc; | |
681 | ||
682 | if (!length) | |
683 | return -EINVAL; | |
684 | if (check_add_overflow(iova, length - 1, &last_iova)) | |
685 | return -EOVERFLOW; | |
686 | ||
687 | down_read(&iopt->iova_rwsem); | |
688 | iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { | |
689 | unsigned long last = min(last_iova, iopt_area_last_iova(area)); | |
690 | unsigned long bytes = (last - iter.cur_iova) + 1; | |
691 | ||
692 | if (area->prevent_access) { | |
693 | rc = -EINVAL; | |
694 | goto err_out; | |
695 | } | |
696 | ||
697 | if (!check_area_prot(area, flags)) { | |
698 | rc = -EPERM; | |
699 | goto err_out; | |
700 | } | |
701 | ||
702 | rc = iopt_pages_rw_access( | |
703 | area->pages, iopt_area_start_byte(area, iter.cur_iova), | |
704 | data, bytes, flags); | |
705 | if (rc) | |
706 | goto err_out; | |
707 | data += bytes; | |
708 | } | |
709 | if (!iopt_area_contig_done(&iter)) | |
710 | rc = -ENOENT; | |
711 | err_out: | |
712 | up_read(&iopt->iova_rwsem); | |
713 | return rc; | |
714 | } | |
715 | EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, IOMMUFD); |