Commit | Line | Data |
---|---|---|
d2912cb1 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
cba3345c AW |
2 | /* |
3 | * VFIO core | |
4 | * | |
5 | * Copyright (C) 2012 Red Hat, Inc. All rights reserved. | |
6 | * Author: Alex Williamson <alex.williamson@redhat.com> | |
7 | * | |
cba3345c AW |
8 | * Derived from original vfio: |
9 | * Copyright 2010 Cisco Systems, Inc. All rights reserved. | |
10 | * Author: Tom Lyon, pugs@cisco.com | |
11 | */ | |
12 | ||
13 | #include <linux/cdev.h> | |
14 | #include <linux/compat.h> | |
15 | #include <linux/device.h> | |
16 | #include <linux/file.h> | |
17 | #include <linux/anon_inodes.h> | |
18 | #include <linux/fs.h> | |
19 | #include <linux/idr.h> | |
20 | #include <linux/iommu.h> | |
21 | #include <linux/list.h> | |
d1099901 | 22 | #include <linux/miscdevice.h> |
cba3345c AW |
23 | #include <linux/module.h> |
24 | #include <linux/mutex.h> | |
5f096b14 | 25 | #include <linux/pci.h> |
9587f44a | 26 | #include <linux/rwsem.h> |
cba3345c AW |
27 | #include <linux/sched.h> |
28 | #include <linux/slab.h> | |
664e9386 | 29 | #include <linux/stat.h> |
cba3345c AW |
30 | #include <linux/string.h> |
31 | #include <linux/uaccess.h> | |
32 | #include <linux/vfio.h> | |
33 | #include <linux/wait.h> | |
41be3e26 | 34 | #include <linux/sched/signal.h> |
8e5c6995 | 35 | #include <linux/pm_runtime.h> |
80c4b92a YH |
36 | #include <linux/interval_tree.h> |
37 | #include <linux/iova_bitmap.h> | |
2a3dab19 | 38 | #include <linux/iommufd.h> |
8cc02d22 | 39 | #include "vfio.h" |
cba3345c AW |
40 | |
41 | #define DRIVER_VERSION "0.3" | |
42 | #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" | |
43 | #define DRIVER_DESC "VFIO - User Level meta-driver" | |
44 | ||
45 | static struct vfio { | |
46 | struct class *class; | |
cba3345c | 47 | struct list_head group_list; |
9cef7391 JG |
48 | struct mutex group_lock; /* locks group_list */ |
49 | struct ida group_ida; | |
d1099901 | 50 | dev_t group_devt; |
3c28a761 YL |
51 | struct class *device_class; |
52 | struct ida device_ida; | |
cba3345c AW |
53 | } vfio; |
54 | ||
2fd585f4 | 55 | static DEFINE_XARRAY(vfio_device_set_xa); |
9cef7391 | 56 | static const struct file_operations vfio_group_fops; |
2fd585f4 JG |
57 | |
58 | int vfio_assign_device_set(struct vfio_device *device, void *set_id) | |
59 | { | |
60 | unsigned long idx = (unsigned long)set_id; | |
61 | struct vfio_device_set *new_dev_set; | |
62 | struct vfio_device_set *dev_set; | |
63 | ||
64 | if (WARN_ON(!set_id)) | |
65 | return -EINVAL; | |
66 | ||
67 | /* | |
68 | * Atomically acquire a singleton object in the xarray for this set_id | |
69 | */ | |
70 | xa_lock(&vfio_device_set_xa); | |
71 | dev_set = xa_load(&vfio_device_set_xa, idx); | |
72 | if (dev_set) | |
73 | goto found_get_ref; | |
74 | xa_unlock(&vfio_device_set_xa); | |
75 | ||
76 | new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL); | |
77 | if (!new_dev_set) | |
78 | return -ENOMEM; | |
79 | mutex_init(&new_dev_set->lock); | |
80 | INIT_LIST_HEAD(&new_dev_set->device_list); | |
81 | new_dev_set->set_id = set_id; | |
82 | ||
83 | xa_lock(&vfio_device_set_xa); | |
84 | dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set, | |
85 | GFP_KERNEL); | |
86 | if (!dev_set) { | |
87 | dev_set = new_dev_set; | |
88 | goto found_get_ref; | |
89 | } | |
90 | ||
91 | kfree(new_dev_set); | |
92 | if (xa_is_err(dev_set)) { | |
93 | xa_unlock(&vfio_device_set_xa); | |
94 | return xa_err(dev_set); | |
95 | } | |
96 | ||
97 | found_get_ref: | |
98 | dev_set->device_count++; | |
99 | xa_unlock(&vfio_device_set_xa); | |
100 | mutex_lock(&dev_set->lock); | |
101 | device->dev_set = dev_set; | |
102 | list_add_tail(&device->dev_set_list, &dev_set->device_list); | |
103 | mutex_unlock(&dev_set->lock); | |
104 | return 0; | |
105 | } | |
106 | EXPORT_SYMBOL_GPL(vfio_assign_device_set); | |
107 | ||
108 | static void vfio_release_device_set(struct vfio_device *device) | |
109 | { | |
110 | struct vfio_device_set *dev_set = device->dev_set; | |
111 | ||
112 | if (!dev_set) | |
113 | return; | |
114 | ||
115 | mutex_lock(&dev_set->lock); | |
116 | list_del(&device->dev_set_list); | |
117 | mutex_unlock(&dev_set->lock); | |
118 | ||
119 | xa_lock(&vfio_device_set_xa); | |
120 | if (!--dev_set->device_count) { | |
121 | __xa_erase(&vfio_device_set_xa, | |
122 | (unsigned long)dev_set->set_id); | |
123 | mutex_destroy(&dev_set->lock); | |
124 | kfree(dev_set); | |
125 | } | |
126 | xa_unlock(&vfio_device_set_xa); | |
127 | } | |
128 | ||
3b9a2d57 | 129 | /* |
cba3345c AW |
130 | * Group objects - create, release, get, put, search |
131 | */ | |
1ceabade JG |
132 | static struct vfio_group * |
133 | __vfio_group_get_from_iommu(struct iommu_group *iommu_group) | |
134 | { | |
135 | struct vfio_group *group; | |
136 | ||
3dd59a7d JG |
137 | /* |
138 | * group->iommu_group from the vfio.group_list cannot be NULL | |
139 | * under the vfio.group_lock. | |
140 | */ | |
1ceabade JG |
141 | list_for_each_entry(group, &vfio.group_list, vfio_next) { |
142 | if (group->iommu_group == iommu_group) { | |
ca5f21b2 | 143 | refcount_inc(&group->drivers); |
1ceabade JG |
144 | return group; |
145 | } | |
146 | } | |
147 | return NULL; | |
148 | } | |
149 | ||
150 | static struct vfio_group * | |
151 | vfio_group_get_from_iommu(struct iommu_group *iommu_group) | |
152 | { | |
153 | struct vfio_group *group; | |
154 | ||
155 | mutex_lock(&vfio.group_lock); | |
156 | group = __vfio_group_get_from_iommu(iommu_group); | |
157 | mutex_unlock(&vfio.group_lock); | |
158 | return group; | |
159 | } | |
160 | ||
9cef7391 | 161 | static void vfio_group_release(struct device *dev) |
cba3345c | 162 | { |
9cef7391 | 163 | struct vfio_group *group = container_of(dev, struct vfio_group, dev); |
9cef7391 JG |
164 | |
165 | mutex_destroy(&group->device_lock); | |
c82e81ab | 166 | mutex_destroy(&group->group_lock); |
3dd59a7d | 167 | WARN_ON(group->iommu_group); |
9cef7391 JG |
168 | ida_free(&vfio.group_ida, MINOR(group->dev.devt)); |
169 | kfree(group); | |
170 | } | |
171 | ||
172 | static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group, | |
173 | enum vfio_group_type type) | |
174 | { | |
175 | struct vfio_group *group; | |
176 | int minor; | |
cba3345c AW |
177 | |
178 | group = kzalloc(sizeof(*group), GFP_KERNEL); | |
179 | if (!group) | |
180 | return ERR_PTR(-ENOMEM); | |
181 | ||
9cef7391 JG |
182 | minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL); |
183 | if (minor < 0) { | |
184 | kfree(group); | |
185 | return ERR_PTR(minor); | |
186 | } | |
187 | ||
188 | device_initialize(&group->dev); | |
189 | group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor); | |
190 | group->dev.class = vfio.class; | |
191 | group->dev.release = vfio_group_release; | |
192 | cdev_init(&group->cdev, &vfio_group_fops); | |
193 | group->cdev.owner = THIS_MODULE; | |
194 | ||
ca5f21b2 | 195 | refcount_set(&group->drivers, 1); |
c82e81ab | 196 | mutex_init(&group->group_lock); |
cba3345c AW |
197 | INIT_LIST_HEAD(&group->device_list); |
198 | mutex_init(&group->device_lock); | |
cba3345c | 199 | group->iommu_group = iommu_group; |
9cef7391 | 200 | /* put in vfio_group_release() */ |
325a31c9 | 201 | iommu_group_ref_get(iommu_group); |
c68ea0d0 | 202 | group->type = type; |
ccd46dba | 203 | BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); |
cba3345c | 204 | |
9cef7391 JG |
205 | return group; |
206 | } | |
207 | ||
208 | static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, | |
209 | enum vfio_group_type type) | |
210 | { | |
211 | struct vfio_group *group; | |
212 | struct vfio_group *ret; | |
213 | int err; | |
214 | ||
215 | group = vfio_group_alloc(iommu_group, type); | |
216 | if (IS_ERR(group)) | |
217 | return group; | |
218 | ||
219 | err = dev_set_name(&group->dev, "%s%d", | |
220 | group->type == VFIO_NO_IOMMU ? "noiommu-" : "", | |
221 | iommu_group_id(iommu_group)); | |
222 | if (err) { | |
223 | ret = ERR_PTR(err); | |
224 | goto err_put; | |
225 | } | |
226 | ||
cba3345c AW |
227 | mutex_lock(&vfio.group_lock); |
228 | ||
cba3345c | 229 | /* Did we race creating this group? */ |
9cef7391 JG |
230 | ret = __vfio_group_get_from_iommu(iommu_group); |
231 | if (ret) | |
232 | goto err_unlock; | |
2f51bf4b | 233 | |
9cef7391 JG |
234 | err = cdev_device_add(&group->cdev, &group->dev); |
235 | if (err) { | |
236 | ret = ERR_PTR(err); | |
237 | goto err_unlock; | |
cba3345c AW |
238 | } |
239 | ||
cba3345c AW |
240 | list_add(&group->vfio_next, &vfio.group_list); |
241 | ||
242 | mutex_unlock(&vfio.group_lock); | |
cba3345c | 243 | return group; |
9cef7391 JG |
244 | |
245 | err_unlock: | |
246 | mutex_unlock(&vfio.group_lock); | |
9cef7391 JG |
247 | err_put: |
248 | put_device(&group->dev); | |
249 | return ret; | |
cba3345c AW |
250 | } |
251 | ||
ca5f21b2 JG |
252 | static void vfio_device_remove_group(struct vfio_device *device) |
253 | { | |
254 | struct vfio_group *group = device->group; | |
3dd59a7d | 255 | struct iommu_group *iommu_group; |
ca5f21b2 JG |
256 | |
257 | if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU) | |
258 | iommu_group_remove_device(device->dev); | |
259 | ||
260 | /* Pairs with vfio_create_group() / vfio_group_get_from_iommu() */ | |
261 | if (!refcount_dec_and_mutex_lock(&group->drivers, &vfio.group_lock)) | |
2b678aa2 | 262 | return; |
ca5f21b2 JG |
263 | list_del(&group->vfio_next); |
264 | ||
265 | /* | |
266 | * We could concurrently probe another driver in the group that might | |
267 | * race vfio_device_remove_group() with vfio_get_group(), so we have to | |
268 | * ensure that the sysfs is all cleaned up under lock otherwise the | |
269 | * cdev_device_add() will fail due to the name aready existing. | |
270 | */ | |
271 | cdev_device_del(&group->cdev, &group->dev); | |
ca5f21b2 | 272 | |
3dd59a7d | 273 | mutex_lock(&group->group_lock); |
63b150fd JG |
274 | /* |
275 | * These data structures all have paired operations that can only be | |
3dd59a7d JG |
276 | * undone when the caller holds a live reference on the device. Since |
277 | * all pairs must be undone these WARN_ON's indicate some caller did not | |
63b150fd JG |
278 | * properly hold the group reference. |
279 | */ | |
cba3345c | 280 | WARN_ON(!list_empty(&group->device_list)); |
65b1adeb | 281 | WARN_ON(group->notifier.head); |
3dd59a7d JG |
282 | |
283 | /* | |
284 | * Revoke all users of group->iommu_group. At this point we know there | |
285 | * are no devices active because we are unplugging the last one. Setting | |
286 | * iommu_group to NULL blocks all new users. | |
287 | */ | |
288 | if (group->container) | |
289 | vfio_group_detach_container(group); | |
290 | iommu_group = group->iommu_group; | |
ca5f21b2 | 291 | group->iommu_group = NULL; |
3dd59a7d JG |
292 | mutex_unlock(&group->group_lock); |
293 | mutex_unlock(&vfio.group_lock); | |
9cef7391 | 294 | |
3dd59a7d | 295 | iommu_group_put(iommu_group); |
9cef7391 | 296 | put_device(&group->dev); |
cba3345c AW |
297 | } |
298 | ||
3b9a2d57 | 299 | /* |
cba3345c AW |
300 | * Device objects - create, release, get, put, search |
301 | */ | |
cba3345c | 302 | /* Device reference always implies a group reference */ |
4a725b8d | 303 | static void vfio_device_put_registration(struct vfio_device *device) |
cba3345c | 304 | { |
5e42c999 JG |
305 | if (refcount_dec_and_test(&device->refcount)) |
306 | complete(&device->comp); | |
cba3345c AW |
307 | } |
308 | ||
4a725b8d | 309 | static bool vfio_device_try_get_registration(struct vfio_device *device) |
cba3345c | 310 | { |
5e42c999 | 311 | return refcount_inc_not_zero(&device->refcount); |
cba3345c AW |
312 | } |
313 | ||
314 | static struct vfio_device *vfio_group_get_device(struct vfio_group *group, | |
315 | struct device *dev) | |
316 | { | |
317 | struct vfio_device *device; | |
318 | ||
319 | mutex_lock(&group->device_lock); | |
320 | list_for_each_entry(device, &group->device_list, group_next) { | |
4a725b8d KT |
321 | if (device->dev == dev && |
322 | vfio_device_try_get_registration(device)) { | |
cba3345c AW |
323 | mutex_unlock(&group->device_lock); |
324 | return device; | |
325 | } | |
326 | } | |
327 | mutex_unlock(&group->device_lock); | |
328 | return NULL; | |
329 | } | |
330 | ||
3b9a2d57 | 331 | /* |
cba3345c AW |
332 | * VFIO driver API |
333 | */ | |
cb9ff3f3 | 334 | /* Release helper called by vfio_put_device() */ |
3c28a761 | 335 | static void vfio_device_release(struct device *dev) |
cb9ff3f3 KT |
336 | { |
337 | struct vfio_device *device = | |
3c28a761 | 338 | container_of(dev, struct vfio_device, device); |
cb9ff3f3 | 339 | |
ebb72b76 | 340 | vfio_release_device_set(device); |
3c28a761 | 341 | ida_free(&vfio.device_ida, device->index); |
cb9ff3f3 KT |
342 | |
343 | /* | |
344 | * kvfree() cannot be done here due to a life cycle mess in | |
345 | * vfio-ccw. Before the ccw part is fixed all drivers are | |
346 | * required to support @release and call vfio_free_device() | |
347 | * from there. | |
348 | */ | |
349 | device->ops->release(device); | |
350 | } | |
cb9ff3f3 KT |
351 | |
352 | /* | |
353 | * Allocate and initialize vfio_device so it can be registered to vfio | |
354 | * core. | |
355 | * | |
356 | * Drivers should use the wrapper vfio_alloc_device() for allocation. | |
357 | * @size is the size of the structure to be allocated, including any | |
358 | * private data used by the driver. | |
359 | * | |
360 | * Driver may provide an @init callback to cover device private data. | |
361 | * | |
362 | * Use vfio_put_device() to release the structure after success return. | |
363 | */ | |
364 | struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev, | |
365 | const struct vfio_device_ops *ops) | |
366 | { | |
367 | struct vfio_device *device; | |
368 | int ret; | |
369 | ||
370 | if (WARN_ON(size < sizeof(struct vfio_device))) | |
371 | return ERR_PTR(-EINVAL); | |
372 | ||
373 | device = kvzalloc(size, GFP_KERNEL); | |
374 | if (!device) | |
375 | return ERR_PTR(-ENOMEM); | |
376 | ||
377 | ret = vfio_init_device(device, dev, ops); | |
378 | if (ret) | |
379 | goto out_free; | |
380 | return device; | |
381 | ||
382 | out_free: | |
383 | kvfree(device); | |
384 | return ERR_PTR(ret); | |
385 | } | |
386 | EXPORT_SYMBOL_GPL(_vfio_alloc_device); | |
387 | ||
388 | /* | |
389 | * Initialize a vfio_device so it can be registered to vfio core. | |
390 | * | |
391 | * Only vfio-ccw driver should call this interface. | |
392 | */ | |
393 | int vfio_init_device(struct vfio_device *device, struct device *dev, | |
394 | const struct vfio_device_ops *ops) | |
395 | { | |
396 | int ret; | |
397 | ||
3c28a761 YL |
398 | ret = ida_alloc_max(&vfio.device_ida, MINORMASK, GFP_KERNEL); |
399 | if (ret < 0) { | |
400 | dev_dbg(dev, "Error to alloc index\n"); | |
401 | return ret; | |
402 | } | |
403 | ||
404 | device->index = ret; | |
ebb72b76 KT |
405 | init_completion(&device->comp); |
406 | device->dev = dev; | |
407 | device->ops = ops; | |
cb9ff3f3 KT |
408 | |
409 | if (ops->init) { | |
410 | ret = ops->init(device); | |
411 | if (ret) | |
412 | goto out_uninit; | |
413 | } | |
414 | ||
3c28a761 YL |
415 | device_initialize(&device->device); |
416 | device->device.release = vfio_device_release; | |
417 | device->device.class = vfio.device_class; | |
418 | device->device.parent = device->dev; | |
cb9ff3f3 KT |
419 | return 0; |
420 | ||
421 | out_uninit: | |
ebb72b76 | 422 | vfio_release_device_set(device); |
3c28a761 | 423 | ida_free(&vfio.device_ida, device->index); |
cb9ff3f3 KT |
424 | return ret; |
425 | } | |
426 | EXPORT_SYMBOL_GPL(vfio_init_device); | |
427 | ||
428 | /* | |
429 | * The helper called by driver @release callback to free the device | |
430 | * structure. Drivers which don't have private data to clean can | |
431 | * simply use this helper as its @release. | |
432 | */ | |
433 | void vfio_free_device(struct vfio_device *device) | |
434 | { | |
435 | kvfree(device); | |
436 | } | |
437 | EXPORT_SYMBOL_GPL(vfio_free_device); | |
438 | ||
c68ea0d0 CH |
439 | static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev, |
440 | enum vfio_group_type type) | |
1362591f CH |
441 | { |
442 | struct iommu_group *iommu_group; | |
443 | struct vfio_group *group; | |
3af91771 CH |
444 | int ret; |
445 | ||
446 | iommu_group = iommu_group_alloc(); | |
447 | if (IS_ERR(iommu_group)) | |
448 | return ERR_CAST(iommu_group); | |
449 | ||
1c61d51e LN |
450 | ret = iommu_group_set_name(iommu_group, "vfio-noiommu"); |
451 | if (ret) | |
452 | goto out_put_group; | |
3af91771 CH |
453 | ret = iommu_group_add_device(iommu_group, dev); |
454 | if (ret) | |
455 | goto out_put_group; | |
1362591f | 456 | |
c68ea0d0 | 457 | group = vfio_create_group(iommu_group, type); |
3af91771 CH |
458 | if (IS_ERR(group)) { |
459 | ret = PTR_ERR(group); | |
460 | goto out_remove_device; | |
461 | } | |
325a31c9 | 462 | iommu_group_put(iommu_group); |
3af91771 CH |
463 | return group; |
464 | ||
465 | out_remove_device: | |
466 | iommu_group_remove_device(dev); | |
467 | out_put_group: | |
468 | iommu_group_put(iommu_group); | |
469 | return ERR_PTR(ret); | |
470 | } | |
3af91771 CH |
471 | |
472 | static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) | |
473 | { | |
474 | struct iommu_group *iommu_group; | |
475 | struct vfio_group *group; | |
476 | ||
477 | iommu_group = iommu_group_get(dev); | |
444d43ec | 478 | if (!iommu_group && vfio_noiommu) { |
3af91771 CH |
479 | /* |
480 | * With noiommu enabled, create an IOMMU group for devices that | |
a77109ff RM |
481 | * don't already have one, implying no IOMMU hardware/driver |
482 | * exists. Taint the kernel because we're about to give a DMA | |
3af91771 CH |
483 | * capable device to a user without IOMMU protection. |
484 | */ | |
c68ea0d0 | 485 | group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU); |
3af91771 CH |
486 | if (!IS_ERR(group)) { |
487 | add_taint(TAINT_USER, LOCKDEP_STILL_OK); | |
488 | dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n"); | |
489 | } | |
490 | return group; | |
491 | } | |
444d43ec | 492 | |
1362591f CH |
493 | if (!iommu_group) |
494 | return ERR_PTR(-EINVAL); | |
495 | ||
afe4e376 JG |
496 | /* |
497 | * VFIO always sets IOMMU_CACHE because we offer no way for userspace to | |
498 | * restore cache coherency. It has to be checked here because it is only | |
499 | * valid for cases where we are using iommu groups. | |
500 | */ | |
a9cf69d0 | 501 | if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) { |
afe4e376 JG |
502 | iommu_group_put(iommu_group); |
503 | return ERR_PTR(-EINVAL); | |
504 | } | |
505 | ||
1362591f | 506 | group = vfio_group_get_from_iommu(iommu_group); |
325a31c9 JG |
507 | if (!group) |
508 | group = vfio_create_group(iommu_group, VFIO_IOMMU); | |
1362591f | 509 | |
325a31c9 | 510 | /* The vfio_group holds a reference to the iommu_group */ |
1362591f CH |
511 | iommu_group_put(iommu_group); |
512 | return group; | |
513 | } | |
514 | ||
c68ea0d0 CH |
515 | static int __vfio_register_dev(struct vfio_device *device, |
516 | struct vfio_group *group) | |
0bfc6a4e JG |
517 | { |
518 | struct vfio_device *existing_device; | |
3c28a761 | 519 | int ret; |
c68ea0d0 | 520 | |
ca5f21b2 JG |
521 | /* |
522 | * In all cases group is the output of one of the group allocation | |
523 | * functions and we have group->drivers incremented for us. | |
524 | */ | |
c68ea0d0 CH |
525 | if (IS_ERR(group)) |
526 | return PTR_ERR(group); | |
cba3345c | 527 | |
a4d1f91d JG |
528 | if (WARN_ON(device->ops->bind_iommufd && |
529 | (!device->ops->unbind_iommufd || | |
530 | !device->ops->attach_ioas))) | |
531 | return -EINVAL; | |
532 | ||
2fd585f4 JG |
533 | /* |
534 | * If the driver doesn't specify a set then the device is added to a | |
535 | * singleton set just for itself. | |
536 | */ | |
537 | if (!device->dev_set) | |
538 | vfio_assign_device_set(device, device); | |
539 | ||
0bfc6a4e JG |
540 | existing_device = vfio_group_get_device(group, device->dev); |
541 | if (existing_device) { | |
3dd59a7d JG |
542 | /* |
543 | * group->iommu_group is non-NULL because we hold the drivers | |
544 | * refcount. | |
545 | */ | |
0bfc6a4e | 546 | dev_WARN(device->dev, "Device already exists on group %d\n", |
1362591f | 547 | iommu_group_id(group->iommu_group)); |
4a725b8d | 548 | vfio_device_put_registration(existing_device); |
3c28a761 YL |
549 | ret = -EBUSY; |
550 | goto err_out; | |
cba3345c AW |
551 | } |
552 | ||
0bfc6a4e JG |
553 | /* Our reference on group is moved to the device */ |
554 | device->group = group; | |
555 | ||
3c28a761 YL |
556 | ret = dev_set_name(&device->device, "vfio%d", device->index); |
557 | if (ret) | |
558 | goto err_out; | |
559 | ||
560 | ret = device_add(&device->device); | |
561 | if (ret) | |
562 | goto err_out; | |
563 | ||
0bfc6a4e JG |
564 | /* Refcounting can't start until the driver calls register */ |
565 | refcount_set(&device->refcount, 1); | |
566 | ||
567 | mutex_lock(&group->device_lock); | |
568 | list_add(&device->group_next, &group->device_list); | |
0bfc6a4e JG |
569 | mutex_unlock(&group->device_lock); |
570 | ||
571 | return 0; | |
3c28a761 | 572 | err_out: |
ca5f21b2 | 573 | vfio_device_remove_group(device); |
3c28a761 | 574 | return ret; |
0bfc6a4e | 575 | } |
c68ea0d0 CH |
576 | |
577 | int vfio_register_group_dev(struct vfio_device *device) | |
578 | { | |
579 | return __vfio_register_dev(device, | |
580 | vfio_group_find_or_alloc(device->dev)); | |
581 | } | |
0bfc6a4e JG |
582 | EXPORT_SYMBOL_GPL(vfio_register_group_dev); |
583 | ||
c68ea0d0 CH |
584 | /* |
585 | * Register a virtual device without IOMMU backing. The user of this | |
586 | * device must not be able to directly trigger unmediated DMA. | |
587 | */ | |
588 | int vfio_register_emulated_iommu_dev(struct vfio_device *device) | |
589 | { | |
590 | return __vfio_register_dev(device, | |
591 | vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU)); | |
592 | } | |
593 | EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev); | |
594 | ||
4bc94d5d AW |
595 | static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, |
596 | char *buf) | |
597 | { | |
5f3874c2 | 598 | struct vfio_device *it, *device = ERR_PTR(-ENODEV); |
4bc94d5d AW |
599 | |
600 | mutex_lock(&group->device_lock); | |
e324fc82 | 601 | list_for_each_entry(it, &group->device_list, group_next) { |
5f3874c2 AW |
602 | int ret; |
603 | ||
604 | if (it->ops->match) { | |
6df62c5b | 605 | ret = it->ops->match(it, buf); |
5f3874c2 AW |
606 | if (ret < 0) { |
607 | device = ERR_PTR(ret); | |
608 | break; | |
609 | } | |
610 | } else { | |
611 | ret = !strcmp(dev_name(it->dev), buf); | |
612 | } | |
613 | ||
4a725b8d | 614 | if (ret && vfio_device_try_get_registration(it)) { |
e324fc82 | 615 | device = it; |
4bc94d5d AW |
616 | break; |
617 | } | |
618 | } | |
619 | mutex_unlock(&group->device_lock); | |
620 | ||
621 | return device; | |
622 | } | |
623 | ||
cba3345c AW |
624 | /* |
625 | * Decrement the device reference count and wait for the device to be | |
626 | * removed. Open file descriptors for the device... */ | |
0bfc6a4e | 627 | void vfio_unregister_group_dev(struct vfio_device *device) |
cba3345c | 628 | { |
cba3345c | 629 | struct vfio_group *group = device->group; |
13060b64 | 630 | unsigned int i = 0; |
db7d4d7f | 631 | bool interrupted = false; |
5e42c999 | 632 | long rc; |
cba3345c | 633 | |
4a725b8d | 634 | vfio_device_put_registration(device); |
5e42c999 JG |
635 | rc = try_wait_for_completion(&device->comp); |
636 | while (rc <= 0) { | |
13060b64 | 637 | if (device->ops->request) |
6df62c5b | 638 | device->ops->request(device, i++); |
13060b64 | 639 | |
db7d4d7f | 640 | if (interrupted) { |
5e42c999 JG |
641 | rc = wait_for_completion_timeout(&device->comp, |
642 | HZ * 10); | |
db7d4d7f | 643 | } else { |
5e42c999 JG |
644 | rc = wait_for_completion_interruptible_timeout( |
645 | &device->comp, HZ * 10); | |
646 | if (rc < 0) { | |
db7d4d7f | 647 | interrupted = true; |
0bfc6a4e | 648 | dev_warn(device->dev, |
db7d4d7f AW |
649 | "Device is currently in use, task" |
650 | " \"%s\" (%d) " | |
651 | "blocked until device is released", | |
652 | current->comm, task_pid_nr(current)); | |
653 | } | |
654 | } | |
5e42c999 | 655 | } |
e014e944 | 656 | |
5e42c999 JG |
657 | mutex_lock(&group->device_lock); |
658 | list_del(&device->group_next); | |
5e42c999 | 659 | mutex_unlock(&group->device_lock); |
41be3e26 | 660 | |
3c28a761 YL |
661 | /* Balances device_add in register path */ |
662 | device_del(&device->device); | |
663 | ||
ca5f21b2 | 664 | vfio_device_remove_group(device); |
0bfc6a4e JG |
665 | } |
666 | EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); | |
667 | ||
3b9a2d57 | 668 | /* |
cba3345c AW |
669 | * VFIO Group fd, /dev/vfio/$GROUP |
670 | */ | |
2a3dab19 JG |
671 | static bool vfio_group_has_iommu(struct vfio_group *group) |
672 | { | |
673 | lockdep_assert_held(&group->group_lock); | |
674 | /* | |
675 | * There can only be users if there is a container, and if there is a | |
676 | * container there must be users. | |
677 | */ | |
678 | WARN_ON(!group->container != !group->container_users); | |
679 | ||
680 | return group->container || group->iommufd; | |
681 | } | |
682 | ||
cba3345c AW |
683 | /* |
684 | * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or | |
685 | * if there was no container to unset. Since the ioctl is called on | |
686 | * the group, we know that still exists, therefore the only valid | |
687 | * transition here is 1->0. | |
688 | */ | |
b3b43590 | 689 | static int vfio_group_ioctl_unset_container(struct vfio_group *group) |
cba3345c | 690 | { |
b3b43590 | 691 | int ret = 0; |
cba3345c | 692 | |
c82e81ab | 693 | mutex_lock(&group->group_lock); |
2a3dab19 | 694 | if (!vfio_group_has_iommu(group)) { |
b3b43590 JG |
695 | ret = -EINVAL; |
696 | goto out_unlock; | |
697 | } | |
2a3dab19 JG |
698 | if (group->container) { |
699 | if (group->container_users != 1) { | |
700 | ret = -EBUSY; | |
701 | goto out_unlock; | |
702 | } | |
703 | vfio_group_detach_container(group); | |
704 | } | |
705 | if (group->iommufd) { | |
706 | iommufd_ctx_put(group->iommufd); | |
707 | group->iommufd = NULL; | |
b3b43590 | 708 | } |
b3b43590 JG |
709 | |
710 | out_unlock: | |
c82e81ab | 711 | mutex_unlock(&group->group_lock); |
b3b43590 | 712 | return ret; |
cba3345c AW |
713 | } |
714 | ||
03e650f6 JG |
715 | static int vfio_group_ioctl_set_container(struct vfio_group *group, |
716 | int __user *arg) | |
717 | { | |
718 | struct vfio_container *container; | |
2a3dab19 | 719 | struct iommufd_ctx *iommufd; |
03e650f6 JG |
720 | struct fd f; |
721 | int ret; | |
722 | int fd; | |
723 | ||
724 | if (get_user(fd, arg)) | |
725 | return -EFAULT; | |
726 | ||
727 | f = fdget(fd); | |
728 | if (!f.file) | |
729 | return -EBADF; | |
730 | ||
c82e81ab | 731 | mutex_lock(&group->group_lock); |
2a3dab19 | 732 | if (vfio_group_has_iommu(group)) { |
03e650f6 JG |
733 | ret = -EINVAL; |
734 | goto out_unlock; | |
735 | } | |
3dd59a7d JG |
736 | if (!group->iommu_group) { |
737 | ret = -ENODEV; | |
738 | goto out_unlock; | |
739 | } | |
740 | ||
03e650f6 | 741 | container = vfio_container_from_file(f.file); |
03e650f6 JG |
742 | if (container) { |
743 | ret = vfio_container_attach_group(container, group); | |
744 | goto out_unlock; | |
745 | } | |
746 | ||
2a3dab19 JG |
747 | iommufd = iommufd_ctx_from_file(f.file); |
748 | if (!IS_ERR(iommufd)) { | |
749 | u32 ioas_id; | |
750 | ||
751 | ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id); | |
752 | if (ret) { | |
753 | iommufd_ctx_put(group->iommufd); | |
754 | goto out_unlock; | |
755 | } | |
756 | ||
757 | group->iommufd = iommufd; | |
758 | goto out_unlock; | |
759 | } | |
760 | ||
761 | /* The FD passed is not recognized. */ | |
762 | ret = -EBADFD; | |
763 | ||
03e650f6 | 764 | out_unlock: |
c82e81ab | 765 | mutex_unlock(&group->group_lock); |
2903ff01 | 766 | fdput(f); |
cba3345c AW |
767 | return ret; |
768 | } | |
769 | ||
cba3345c AW |
770 | static const struct file_operations vfio_device_fops; |
771 | ||
eadd86f8 | 772 | /* true if the vfio_device has open_device() called but not close_device() */ |
cdc71fe4 | 773 | bool vfio_assert_device_open(struct vfio_device *device) |
32f55d83 | 774 | { |
eadd86f8 JG |
775 | return !WARN_ON_ONCE(!READ_ONCE(device->open_count)); |
776 | } | |
777 | ||
294aaccb JG |
778 | static int vfio_device_first_open(struct vfio_device *device) |
779 | { | |
780 | int ret; | |
781 | ||
782 | lockdep_assert_held(&device->dev_set->lock); | |
783 | ||
784 | if (!try_module_get(device->dev->driver->owner)) | |
785 | return -ENODEV; | |
786 | ||
787 | /* | |
788 | * Here we pass the KVM pointer with the group under the lock. If the | |
789 | * device driver will use it, it must obtain a reference and release it | |
790 | * during close_device. | |
791 | */ | |
792 | mutex_lock(&device->group->group_lock); | |
2a3dab19 JG |
793 | if (!vfio_group_has_iommu(device->group)) { |
794 | ret = -EINVAL; | |
bab6fabc | 795 | goto err_module_put; |
2a3dab19 JG |
796 | } |
797 | ||
798 | if (device->group->container) { | |
799 | ret = vfio_group_use_container(device->group); | |
800 | if (ret) | |
801 | goto err_module_put; | |
a4d1f91d JG |
802 | } else if (device->group->iommufd) { |
803 | ret = vfio_iommufd_bind(device, device->group->iommufd); | |
804 | if (ret) | |
805 | goto err_module_put; | |
2a3dab19 | 806 | } |
bab6fabc | 807 | |
294aaccb JG |
808 | device->kvm = device->group->kvm; |
809 | if (device->ops->open_device) { | |
810 | ret = device->ops->open_device(device); | |
811 | if (ret) | |
bab6fabc | 812 | goto err_container; |
294aaccb | 813 | } |
2a3dab19 JG |
814 | if (device->group->container) |
815 | vfio_device_container_register(device); | |
294aaccb JG |
816 | mutex_unlock(&device->group->group_lock); |
817 | return 0; | |
818 | ||
bab6fabc | 819 | err_container: |
294aaccb | 820 | device->kvm = NULL; |
2a3dab19 JG |
821 | if (device->group->container) |
822 | vfio_group_unuse_container(device->group); | |
a4d1f91d JG |
823 | else if (device->group->iommufd) |
824 | vfio_iommufd_unbind(device); | |
bab6fabc | 825 | err_module_put: |
294aaccb JG |
826 | mutex_unlock(&device->group->group_lock); |
827 | module_put(device->dev->driver->owner); | |
828 | return ret; | |
829 | } | |
830 | ||
831 | static void vfio_device_last_close(struct vfio_device *device) | |
832 | { | |
833 | lockdep_assert_held(&device->dev_set->lock); | |
834 | ||
835 | mutex_lock(&device->group->group_lock); | |
2a3dab19 JG |
836 | if (device->group->container) |
837 | vfio_device_container_unregister(device); | |
294aaccb JG |
838 | if (device->ops->close_device) |
839 | device->ops->close_device(device); | |
840 | device->kvm = NULL; | |
2a3dab19 JG |
841 | if (device->group->container) |
842 | vfio_group_unuse_container(device->group); | |
a4d1f91d JG |
843 | else if (device->group->iommufd) |
844 | vfio_iommufd_unbind(device); | |
294aaccb JG |
845 | mutex_unlock(&device->group->group_lock); |
846 | module_put(device->dev->driver->owner); | |
847 | } | |
848 | ||
805bb6c1 | 849 | static struct file *vfio_device_open(struct vfio_device *device) |
cba3345c | 850 | { |
cba3345c | 851 | struct file *filep; |
805bb6c1 | 852 | int ret; |
03a76b60 | 853 | |
2fd585f4 JG |
854 | mutex_lock(&device->dev_set->lock); |
855 | device->open_count++; | |
421cfe65 | 856 | if (device->open_count == 1) { |
294aaccb JG |
857 | ret = vfio_device_first_open(device); |
858 | if (ret) | |
bab6fabc | 859 | goto err_unlock; |
2fd585f4 JG |
860 | } |
861 | mutex_unlock(&device->dev_set->lock); | |
862 | ||
4bc94d5d AW |
863 | /* |
864 | * We can't use anon_inode_getfd() because we need to modify | |
865 | * the f_mode flags directly to allow more than just ioctls | |
866 | */ | |
4bc94d5d AW |
867 | filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, |
868 | device, O_RDWR); | |
869 | if (IS_ERR(filep)) { | |
4bc94d5d | 870 | ret = PTR_ERR(filep); |
805bb6c1 | 871 | goto err_close_device; |
4bc94d5d AW |
872 | } |
873 | ||
874 | /* | |
875 | * TODO: add an anon_inode interface to do this. | |
876 | * Appears to be missing by lack of need rather than | |
877 | * explicitly prevented. Now there's need. | |
878 | */ | |
54ef7a47 | 879 | filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE); |
cba3345c | 880 | |
805bb6c1 | 881 | if (device->group->type == VFIO_NO_IOMMU) |
03a76b60 AW |
882 | dev_warn(device->dev, "vfio-noiommu device opened by user " |
883 | "(%s:%d)\n", current->comm, task_pid_nr(current)); | |
805bb6c1 JG |
884 | /* |
885 | * On success the ref of device is moved to the file and | |
886 | * put in vfio_device_fops_release() | |
887 | */ | |
888 | return filep; | |
03a76b60 | 889 | |
2fd585f4 JG |
890 | err_close_device: |
891 | mutex_lock(&device->dev_set->lock); | |
294aaccb JG |
892 | if (device->open_count == 1) |
893 | vfio_device_last_close(device); | |
bab6fabc | 894 | err_unlock: |
2fd585f4 JG |
895 | device->open_count--; |
896 | mutex_unlock(&device->dev_set->lock); | |
805bb6c1 JG |
897 | return ERR_PTR(ret); |
898 | } | |
899 | ||
150ee2f9 JG |
900 | static int vfio_group_ioctl_get_device_fd(struct vfio_group *group, |
901 | char __user *arg) | |
805bb6c1 JG |
902 | { |
903 | struct vfio_device *device; | |
904 | struct file *filep; | |
150ee2f9 | 905 | char *buf; |
805bb6c1 JG |
906 | int fdno; |
907 | int ret; | |
908 | ||
150ee2f9 JG |
909 | buf = strndup_user(arg, PAGE_SIZE); |
910 | if (IS_ERR(buf)) | |
911 | return PTR_ERR(buf); | |
912 | ||
805bb6c1 | 913 | device = vfio_device_get_from_name(group, buf); |
150ee2f9 | 914 | kfree(buf); |
805bb6c1 JG |
915 | if (IS_ERR(device)) |
916 | return PTR_ERR(device); | |
917 | ||
918 | fdno = get_unused_fd_flags(O_CLOEXEC); | |
919 | if (fdno < 0) { | |
920 | ret = fdno; | |
921 | goto err_put_device; | |
922 | } | |
923 | ||
924 | filep = vfio_device_open(device); | |
925 | if (IS_ERR(filep)) { | |
926 | ret = PTR_ERR(filep); | |
927 | goto err_put_fdno; | |
928 | } | |
929 | ||
930 | fd_install(fdno, filep); | |
931 | return fdno; | |
932 | ||
933 | err_put_fdno: | |
934 | put_unused_fd(fdno); | |
935 | err_put_device: | |
4a725b8d | 936 | vfio_device_put_registration(device); |
cba3345c AW |
937 | return ret; |
938 | } | |
939 | ||
99a27c08 JG |
940 | static int vfio_group_ioctl_get_status(struct vfio_group *group, |
941 | struct vfio_group_status __user *arg) | |
942 | { | |
943 | unsigned long minsz = offsetofend(struct vfio_group_status, flags); | |
944 | struct vfio_group_status status; | |
945 | ||
946 | if (copy_from_user(&status, arg, minsz)) | |
947 | return -EFAULT; | |
948 | ||
949 | if (status.argsz < minsz) | |
950 | return -EINVAL; | |
951 | ||
952 | status.flags = 0; | |
953 | ||
c82e81ab | 954 | mutex_lock(&group->group_lock); |
3dd59a7d JG |
955 | if (!group->iommu_group) { |
956 | mutex_unlock(&group->group_lock); | |
957 | return -ENODEV; | |
958 | } | |
959 | ||
2a3dab19 JG |
960 | /* |
961 | * With the container FD the iommu_group_claim_dma_owner() is done | |
962 | * during SET_CONTAINER but for IOMMFD this is done during | |
963 | * VFIO_GROUP_GET_DEVICE_FD. Meaning that with iommufd | |
964 | * VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due | |
965 | * to viability. | |
966 | */ | |
967 | if (vfio_group_has_iommu(group)) | |
99a27c08 JG |
968 | status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET | |
969 | VFIO_GROUP_FLAGS_VIABLE; | |
970 | else if (!iommu_group_dma_owner_claimed(group->iommu_group)) | |
971 | status.flags |= VFIO_GROUP_FLAGS_VIABLE; | |
c82e81ab | 972 | mutex_unlock(&group->group_lock); |
99a27c08 JG |
973 | |
974 | if (copy_to_user(arg, &status, minsz)) | |
975 | return -EFAULT; | |
976 | return 0; | |
977 | } | |
978 | ||
cba3345c AW |
979 | static long vfio_group_fops_unl_ioctl(struct file *filep, |
980 | unsigned int cmd, unsigned long arg) | |
981 | { | |
982 | struct vfio_group *group = filep->private_data; | |
150ee2f9 | 983 | void __user *uarg = (void __user *)arg; |
cba3345c AW |
984 | |
985 | switch (cmd) { | |
150ee2f9 JG |
986 | case VFIO_GROUP_GET_DEVICE_FD: |
987 | return vfio_group_ioctl_get_device_fd(group, uarg); | |
cba3345c | 988 | case VFIO_GROUP_GET_STATUS: |
99a27c08 | 989 | return vfio_group_ioctl_get_status(group, uarg); |
cba3345c | 990 | case VFIO_GROUP_SET_CONTAINER: |
67671f15 | 991 | return vfio_group_ioctl_set_container(group, uarg); |
cba3345c | 992 | case VFIO_GROUP_UNSET_CONTAINER: |
b3b43590 | 993 | return vfio_group_ioctl_unset_container(group); |
99a27c08 JG |
994 | default: |
995 | return -ENOTTY; | |
cba3345c | 996 | } |
cba3345c AW |
997 | } |
998 | ||
cba3345c AW |
999 | static int vfio_group_fops_open(struct inode *inode, struct file *filep) |
1000 | { | |
9cef7391 JG |
1001 | struct vfio_group *group = |
1002 | container_of(inode->i_cdev, struct vfio_group, cdev); | |
c6f4860e | 1003 | int ret; |
cba3345c | 1004 | |
c82e81ab | 1005 | mutex_lock(&group->group_lock); |
cba3345c | 1006 | |
912b74d2 JG |
1007 | /* |
1008 | * drivers can be zero if this races with vfio_device_remove_group(), it | |
1009 | * will be stable at 0 under the group rwsem | |
1010 | */ | |
1011 | if (refcount_read(&group->drivers) == 0) { | |
c6f4860e | 1012 | ret = -ENODEV; |
912b74d2 | 1013 | goto out_unlock; |
03a76b60 AW |
1014 | } |
1015 | ||
c6f4860e JG |
1016 | if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) { |
1017 | ret = -EPERM; | |
912b74d2 | 1018 | goto out_unlock; |
6d6768c6 AW |
1019 | } |
1020 | ||
c6f4860e JG |
1021 | /* |
1022 | * Do we need multiple instances of the group open? Seems not. | |
c6f4860e | 1023 | */ |
b76c0eed | 1024 | if (group->opened_file) { |
c6f4860e | 1025 | ret = -EBUSY; |
912b74d2 | 1026 | goto out_unlock; |
cba3345c | 1027 | } |
b76c0eed | 1028 | group->opened_file = filep; |
cba3345c | 1029 | filep->private_data = group; |
912b74d2 JG |
1030 | ret = 0; |
1031 | out_unlock: | |
c82e81ab | 1032 | mutex_unlock(&group->group_lock); |
c6f4860e | 1033 | return ret; |
cba3345c AW |
1034 | } |
1035 | ||
1036 | static int vfio_group_fops_release(struct inode *inode, struct file *filep) | |
1037 | { | |
1038 | struct vfio_group *group = filep->private_data; | |
1039 | ||
1040 | filep->private_data = NULL; | |
1041 | ||
c82e81ab | 1042 | mutex_lock(&group->group_lock); |
b76c0eed JG |
1043 | /* |
1044 | * Device FDs hold a group file reference, therefore the group release | |
1045 | * is only called when there are no open devices. | |
1046 | */ | |
1047 | WARN_ON(group->notifier.head); | |
429a781c JG |
1048 | if (group->container) |
1049 | vfio_group_detach_container(group); | |
2a3dab19 JG |
1050 | if (group->iommufd) { |
1051 | iommufd_ctx_put(group->iommufd); | |
1052 | group->iommufd = NULL; | |
1053 | } | |
b76c0eed | 1054 | group->opened_file = NULL; |
c82e81ab | 1055 | mutex_unlock(&group->group_lock); |
cba3345c AW |
1056 | return 0; |
1057 | } | |
1058 | ||
1059 | static const struct file_operations vfio_group_fops = { | |
1060 | .owner = THIS_MODULE, | |
1061 | .unlocked_ioctl = vfio_group_fops_unl_ioctl, | |
407e9ef7 | 1062 | .compat_ioctl = compat_ptr_ioctl, |
cba3345c AW |
1063 | .open = vfio_group_fops_open, |
1064 | .release = vfio_group_fops_release, | |
1065 | }; | |
1066 | ||
8e5c6995 AS |
1067 | /* |
1068 | * Wrapper around pm_runtime_resume_and_get(). | |
1069 | * Return error code on failure or 0 on success. | |
1070 | */ | |
1071 | static inline int vfio_device_pm_runtime_get(struct vfio_device *device) | |
1072 | { | |
1073 | struct device *dev = device->dev; | |
1074 | ||
1075 | if (dev->driver && dev->driver->pm) { | |
1076 | int ret; | |
1077 | ||
1078 | ret = pm_runtime_resume_and_get(dev); | |
1079 | if (ret) { | |
1080 | dev_info_ratelimited(dev, | |
1081 | "vfio: runtime resume failed %d\n", ret); | |
1082 | return -EIO; | |
1083 | } | |
1084 | } | |
1085 | ||
1086 | return 0; | |
1087 | } | |
1088 | ||
1089 | /* | |
1090 | * Wrapper around pm_runtime_put(). | |
1091 | */ | |
1092 | static inline void vfio_device_pm_runtime_put(struct vfio_device *device) | |
1093 | { | |
1094 | struct device *dev = device->dev; | |
1095 | ||
1096 | if (dev->driver && dev->driver->pm) | |
1097 | pm_runtime_put(dev); | |
1098 | } | |
1099 | ||
3b9a2d57 | 1100 | /* |
cba3345c AW |
1101 | * VFIO Device fd |
1102 | */ | |
1103 | static int vfio_device_fops_release(struct inode *inode, struct file *filep) | |
1104 | { | |
1105 | struct vfio_device *device = filep->private_data; | |
1106 | ||
2fd585f4 | 1107 | mutex_lock(&device->dev_set->lock); |
eadd86f8 | 1108 | vfio_assert_device_open(device); |
294aaccb JG |
1109 | if (device->open_count == 1) |
1110 | vfio_device_last_close(device); | |
eadd86f8 | 1111 | device->open_count--; |
2fd585f4 | 1112 | mutex_unlock(&device->dev_set->lock); |
cba3345c | 1113 | |
4a725b8d | 1114 | vfio_device_put_registration(device); |
cba3345c AW |
1115 | |
1116 | return 0; | |
1117 | } | |
1118 | ||
115dcec6 JG |
1119 | /* |
1120 | * vfio_mig_get_next_state - Compute the next step in the FSM | |
1121 | * @cur_fsm - The current state the device is in | |
1122 | * @new_fsm - The target state to reach | |
1123 | * @next_fsm - Pointer to the next step to get to new_fsm | |
1124 | * | |
1125 | * Return 0 upon success, otherwise -errno | |
1126 | * Upon success the next step in the state progression between cur_fsm and | |
1127 | * new_fsm will be set in next_fsm. | |
1128 | * | |
1129 | * This breaks down requests for combination transitions into smaller steps and | |
1130 | * returns the next step to get to new_fsm. The function may need to be called | |
1131 | * multiple times before reaching new_fsm. | |
1132 | * | |
1133 | */ | |
1134 | int vfio_mig_get_next_state(struct vfio_device *device, | |
1135 | enum vfio_device_mig_state cur_fsm, | |
1136 | enum vfio_device_mig_state new_fsm, | |
1137 | enum vfio_device_mig_state *next_fsm) | |
1138 | { | |
8cb3d83b | 1139 | enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_RUNNING_P2P + 1 }; |
115dcec6 | 1140 | /* |
8cb3d83b JG |
1141 | * The coding in this table requires the driver to implement the |
1142 | * following FSM arcs: | |
115dcec6 | 1143 | * RESUMING -> STOP |
115dcec6 | 1144 | * STOP -> RESUMING |
115dcec6 JG |
1145 | * STOP -> STOP_COPY |
1146 | * STOP_COPY -> STOP | |
1147 | * | |
8cb3d83b JG |
1148 | * If P2P is supported then the driver must also implement these FSM |
1149 | * arcs: | |
1150 | * RUNNING -> RUNNING_P2P | |
1151 | * RUNNING_P2P -> RUNNING | |
1152 | * RUNNING_P2P -> STOP | |
1153 | * STOP -> RUNNING_P2P | |
1154 | * Without P2P the driver must implement: | |
1155 | * RUNNING -> STOP | |
1156 | * STOP -> RUNNING | |
1157 | * | |
1158 | * The coding will step through multiple states for some combination | |
1159 | * transitions; if all optional features are supported, this means the | |
1160 | * following ones: | |
1161 | * RESUMING -> STOP -> RUNNING_P2P | |
1162 | * RESUMING -> STOP -> RUNNING_P2P -> RUNNING | |
115dcec6 | 1163 | * RESUMING -> STOP -> STOP_COPY |
8cb3d83b JG |
1164 | * RUNNING -> RUNNING_P2P -> STOP |
1165 | * RUNNING -> RUNNING_P2P -> STOP -> RESUMING | |
1166 | * RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY | |
1167 | * RUNNING_P2P -> STOP -> RESUMING | |
1168 | * RUNNING_P2P -> STOP -> STOP_COPY | |
1169 | * STOP -> RUNNING_P2P -> RUNNING | |
115dcec6 | 1170 | * STOP_COPY -> STOP -> RESUMING |
8cb3d83b JG |
1171 | * STOP_COPY -> STOP -> RUNNING_P2P |
1172 | * STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING | |
115dcec6 JG |
1173 | */ |
1174 | static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = { | |
1175 | [VFIO_DEVICE_STATE_STOP] = { | |
1176 | [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, | |
8cb3d83b | 1177 | [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P, |
115dcec6 JG |
1178 | [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, |
1179 | [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, | |
8cb3d83b | 1180 | [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, |
115dcec6 JG |
1181 | [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, |
1182 | }, | |
1183 | [VFIO_DEVICE_STATE_RUNNING] = { | |
8cb3d83b | 1184 | [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P, |
115dcec6 | 1185 | [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, |
8cb3d83b JG |
1186 | [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P, |
1187 | [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P, | |
1188 | [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, | |
115dcec6 JG |
1189 | [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, |
1190 | }, | |
1191 | [VFIO_DEVICE_STATE_STOP_COPY] = { | |
1192 | [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, | |
1193 | [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, | |
1194 | [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, | |
1195 | [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, | |
8cb3d83b | 1196 | [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, |
115dcec6 JG |
1197 | [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, |
1198 | }, | |
1199 | [VFIO_DEVICE_STATE_RESUMING] = { | |
1200 | [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, | |
1201 | [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, | |
1202 | [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, | |
1203 | [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, | |
8cb3d83b JG |
1204 | [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, |
1205 | [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, | |
1206 | }, | |
1207 | [VFIO_DEVICE_STATE_RUNNING_P2P] = { | |
1208 | [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, | |
1209 | [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, | |
1210 | [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, | |
1211 | [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, | |
1212 | [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, | |
115dcec6 JG |
1213 | [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, |
1214 | }, | |
1215 | [VFIO_DEVICE_STATE_ERROR] = { | |
1216 | [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR, | |
1217 | [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR, | |
1218 | [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR, | |
1219 | [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR, | |
8cb3d83b | 1220 | [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR, |
115dcec6 JG |
1221 | [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, |
1222 | }, | |
1223 | }; | |
1224 | ||
8cb3d83b JG |
1225 | static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = { |
1226 | [VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY, | |
1227 | [VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY, | |
1228 | [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY, | |
1229 | [VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY, | |
1230 | [VFIO_DEVICE_STATE_RUNNING_P2P] = | |
1231 | VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P, | |
1232 | [VFIO_DEVICE_STATE_ERROR] = ~0U, | |
1233 | }; | |
1234 | ||
1235 | if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || | |
1236 | (state_flags_table[cur_fsm] & device->migration_flags) != | |
1237 | state_flags_table[cur_fsm])) | |
115dcec6 JG |
1238 | return -EINVAL; |
1239 | ||
8cb3d83b JG |
1240 | if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || |
1241 | (state_flags_table[new_fsm] & device->migration_flags) != | |
1242 | state_flags_table[new_fsm]) | |
115dcec6 JG |
1243 | return -EINVAL; |
1244 | ||
8cb3d83b JG |
1245 | /* |
1246 | * Arcs touching optional and unsupported states are skipped over. The | |
1247 | * driver will instead see an arc from the original state to the next | |
1248 | * logical state, as per the above comment. | |
1249 | */ | |
115dcec6 | 1250 | *next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm]; |
8cb3d83b JG |
1251 | while ((state_flags_table[*next_fsm] & device->migration_flags) != |
1252 | state_flags_table[*next_fsm]) | |
1253 | *next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm]; | |
1254 | ||
115dcec6 JG |
1255 | return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL; |
1256 | } | |
1257 | EXPORT_SYMBOL_GPL(vfio_mig_get_next_state); | |
1258 | ||
1259 | /* | |
1260 | * Convert the drivers's struct file into a FD number and return it to userspace | |
1261 | */ | |
1262 | static int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg, | |
1263 | struct vfio_device_feature_mig_state *mig) | |
1264 | { | |
1265 | int ret; | |
1266 | int fd; | |
1267 | ||
1268 | fd = get_unused_fd_flags(O_CLOEXEC); | |
1269 | if (fd < 0) { | |
1270 | ret = fd; | |
1271 | goto out_fput; | |
1272 | } | |
1273 | ||
1274 | mig->data_fd = fd; | |
1275 | if (copy_to_user(arg, mig, sizeof(*mig))) { | |
1276 | ret = -EFAULT; | |
1277 | goto out_put_unused; | |
1278 | } | |
1279 | fd_install(fd, filp); | |
1280 | return 0; | |
1281 | ||
1282 | out_put_unused: | |
1283 | put_unused_fd(fd); | |
1284 | out_fput: | |
1285 | fput(filp); | |
1286 | return ret; | |
1287 | } | |
1288 | ||
1289 | static int | |
1290 | vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device, | |
1291 | u32 flags, void __user *arg, | |
1292 | size_t argsz) | |
1293 | { | |
1294 | size_t minsz = | |
1295 | offsetofend(struct vfio_device_feature_mig_state, data_fd); | |
1296 | struct vfio_device_feature_mig_state mig; | |
1297 | struct file *filp = NULL; | |
1298 | int ret; | |
1299 | ||
6e97eba8 | 1300 | if (!device->mig_ops) |
115dcec6 JG |
1301 | return -ENOTTY; |
1302 | ||
1303 | ret = vfio_check_feature(flags, argsz, | |
1304 | VFIO_DEVICE_FEATURE_SET | | |
1305 | VFIO_DEVICE_FEATURE_GET, | |
1306 | sizeof(mig)); | |
1307 | if (ret != 1) | |
1308 | return ret; | |
1309 | ||
1310 | if (copy_from_user(&mig, arg, minsz)) | |
1311 | return -EFAULT; | |
1312 | ||
1313 | if (flags & VFIO_DEVICE_FEATURE_GET) { | |
1314 | enum vfio_device_mig_state curr_state; | |
1315 | ||
6e97eba8 YH |
1316 | ret = device->mig_ops->migration_get_state(device, |
1317 | &curr_state); | |
115dcec6 JG |
1318 | if (ret) |
1319 | return ret; | |
1320 | mig.device_state = curr_state; | |
1321 | goto out_copy; | |
1322 | } | |
1323 | ||
1324 | /* Handle the VFIO_DEVICE_FEATURE_SET */ | |
6e97eba8 | 1325 | filp = device->mig_ops->migration_set_state(device, mig.device_state); |
115dcec6 JG |
1326 | if (IS_ERR(filp) || !filp) |
1327 | goto out_copy; | |
1328 | ||
1329 | return vfio_ioct_mig_return_fd(filp, arg, &mig); | |
1330 | out_copy: | |
1331 | mig.data_fd = -1; | |
1332 | if (copy_to_user(arg, &mig, sizeof(mig))) | |
1333 | return -EFAULT; | |
1334 | if (IS_ERR(filp)) | |
1335 | return PTR_ERR(filp); | |
1336 | return 0; | |
1337 | } | |
1338 | ||
1339 | static int vfio_ioctl_device_feature_migration(struct vfio_device *device, | |
1340 | u32 flags, void __user *arg, | |
1341 | size_t argsz) | |
1342 | { | |
1343 | struct vfio_device_feature_migration mig = { | |
8cb3d83b | 1344 | .flags = device->migration_flags, |
115dcec6 JG |
1345 | }; |
1346 | int ret; | |
1347 | ||
6e97eba8 | 1348 | if (!device->mig_ops) |
115dcec6 JG |
1349 | return -ENOTTY; |
1350 | ||
1351 | ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, | |
1352 | sizeof(mig)); | |
1353 | if (ret != 1) | |
1354 | return ret; | |
1355 | if (copy_to_user(arg, &mig, sizeof(mig))) | |
1356 | return -EFAULT; | |
1357 | return 0; | |
1358 | } | |
1359 | ||
80c4b92a YH |
1360 | /* Ranges should fit into a single kernel page */ |
1361 | #define LOG_MAX_RANGES \ | |
1362 | (PAGE_SIZE / sizeof(struct vfio_device_feature_dma_logging_range)) | |
1363 | ||
1364 | static int | |
1365 | vfio_ioctl_device_feature_logging_start(struct vfio_device *device, | |
1366 | u32 flags, void __user *arg, | |
1367 | size_t argsz) | |
1368 | { | |
1369 | size_t minsz = | |
1370 | offsetofend(struct vfio_device_feature_dma_logging_control, | |
1371 | ranges); | |
1372 | struct vfio_device_feature_dma_logging_range __user *ranges; | |
1373 | struct vfio_device_feature_dma_logging_control control; | |
1374 | struct vfio_device_feature_dma_logging_range range; | |
1375 | struct rb_root_cached root = RB_ROOT_CACHED; | |
1376 | struct interval_tree_node *nodes; | |
1377 | u64 iova_end; | |
1378 | u32 nnodes; | |
1379 | int i, ret; | |
1380 | ||
1381 | if (!device->log_ops) | |
1382 | return -ENOTTY; | |
1383 | ||
1384 | ret = vfio_check_feature(flags, argsz, | |
1385 | VFIO_DEVICE_FEATURE_SET, | |
1386 | sizeof(control)); | |
1387 | if (ret != 1) | |
1388 | return ret; | |
1389 | ||
1390 | if (copy_from_user(&control, arg, minsz)) | |
1391 | return -EFAULT; | |
1392 | ||
1393 | nnodes = control.num_ranges; | |
1394 | if (!nnodes) | |
1395 | return -EINVAL; | |
1396 | ||
1397 | if (nnodes > LOG_MAX_RANGES) | |
1398 | return -E2BIG; | |
1399 | ||
1400 | ranges = u64_to_user_ptr(control.ranges); | |
1401 | nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node), | |
1402 | GFP_KERNEL); | |
1403 | if (!nodes) | |
1404 | return -ENOMEM; | |
1405 | ||
1406 | for (i = 0; i < nnodes; i++) { | |
1407 | if (copy_from_user(&range, &ranges[i], sizeof(range))) { | |
1408 | ret = -EFAULT; | |
1409 | goto end; | |
1410 | } | |
1411 | if (!IS_ALIGNED(range.iova, control.page_size) || | |
1412 | !IS_ALIGNED(range.length, control.page_size)) { | |
1413 | ret = -EINVAL; | |
1414 | goto end; | |
1415 | } | |
1416 | ||
1417 | if (check_add_overflow(range.iova, range.length, &iova_end) || | |
1418 | iova_end > ULONG_MAX) { | |
1419 | ret = -EOVERFLOW; | |
1420 | goto end; | |
1421 | } | |
1422 | ||
1423 | nodes[i].start = range.iova; | |
1424 | nodes[i].last = range.iova + range.length - 1; | |
1425 | if (interval_tree_iter_first(&root, nodes[i].start, | |
1426 | nodes[i].last)) { | |
1427 | /* Range overlapping */ | |
1428 | ret = -EINVAL; | |
1429 | goto end; | |
1430 | } | |
1431 | interval_tree_insert(nodes + i, &root); | |
1432 | } | |
1433 | ||
1434 | ret = device->log_ops->log_start(device, &root, nnodes, | |
1435 | &control.page_size); | |
1436 | if (ret) | |
1437 | goto end; | |
1438 | ||
1439 | if (copy_to_user(arg, &control, sizeof(control))) { | |
1440 | ret = -EFAULT; | |
1441 | device->log_ops->log_stop(device); | |
1442 | } | |
1443 | ||
1444 | end: | |
1445 | kfree(nodes); | |
1446 | return ret; | |
1447 | } | |
1448 | ||
1449 | static int | |
1450 | vfio_ioctl_device_feature_logging_stop(struct vfio_device *device, | |
1451 | u32 flags, void __user *arg, | |
1452 | size_t argsz) | |
1453 | { | |
1454 | int ret; | |
1455 | ||
1456 | if (!device->log_ops) | |
1457 | return -ENOTTY; | |
1458 | ||
1459 | ret = vfio_check_feature(flags, argsz, | |
1460 | VFIO_DEVICE_FEATURE_SET, 0); | |
1461 | if (ret != 1) | |
1462 | return ret; | |
1463 | ||
1464 | return device->log_ops->log_stop(device); | |
1465 | } | |
1466 | ||
1467 | static int vfio_device_log_read_and_clear(struct iova_bitmap *iter, | |
1468 | unsigned long iova, size_t length, | |
1469 | void *opaque) | |
1470 | { | |
1471 | struct vfio_device *device = opaque; | |
1472 | ||
1473 | return device->log_ops->log_read_and_clear(device, iova, length, iter); | |
1474 | } | |
1475 | ||
1476 | static int | |
1477 | vfio_ioctl_device_feature_logging_report(struct vfio_device *device, | |
1478 | u32 flags, void __user *arg, | |
1479 | size_t argsz) | |
1480 | { | |
1481 | size_t minsz = | |
1482 | offsetofend(struct vfio_device_feature_dma_logging_report, | |
1483 | bitmap); | |
1484 | struct vfio_device_feature_dma_logging_report report; | |
1485 | struct iova_bitmap *iter; | |
1486 | u64 iova_end; | |
1487 | int ret; | |
1488 | ||
1489 | if (!device->log_ops) | |
1490 | return -ENOTTY; | |
1491 | ||
1492 | ret = vfio_check_feature(flags, argsz, | |
1493 | VFIO_DEVICE_FEATURE_GET, | |
1494 | sizeof(report)); | |
1495 | if (ret != 1) | |
1496 | return ret; | |
1497 | ||
1498 | if (copy_from_user(&report, arg, minsz)) | |
1499 | return -EFAULT; | |
1500 | ||
1501 | if (report.page_size < SZ_4K || !is_power_of_2(report.page_size)) | |
1502 | return -EINVAL; | |
1503 | ||
1504 | if (check_add_overflow(report.iova, report.length, &iova_end) || | |
1505 | iova_end > ULONG_MAX) | |
1506 | return -EOVERFLOW; | |
1507 | ||
1508 | iter = iova_bitmap_alloc(report.iova, report.length, | |
1509 | report.page_size, | |
1510 | u64_to_user_ptr(report.bitmap)); | |
1511 | if (IS_ERR(iter)) | |
1512 | return PTR_ERR(iter); | |
1513 | ||
1514 | ret = iova_bitmap_for_each(iter, device, | |
1515 | vfio_device_log_read_and_clear); | |
1516 | ||
1517 | iova_bitmap_free(iter); | |
1518 | return ret; | |
1519 | } | |
1520 | ||
445ad495 JG |
1521 | static int vfio_ioctl_device_feature(struct vfio_device *device, |
1522 | struct vfio_device_feature __user *arg) | |
1523 | { | |
1524 | size_t minsz = offsetofend(struct vfio_device_feature, flags); | |
1525 | struct vfio_device_feature feature; | |
1526 | ||
1527 | if (copy_from_user(&feature, arg, minsz)) | |
1528 | return -EFAULT; | |
1529 | ||
1530 | if (feature.argsz < minsz) | |
1531 | return -EINVAL; | |
1532 | ||
1533 | /* Check unknown flags */ | |
1534 | if (feature.flags & | |
1535 | ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET | | |
1536 | VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE)) | |
1537 | return -EINVAL; | |
1538 | ||
1539 | /* GET & SET are mutually exclusive except with PROBE */ | |
1540 | if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) && | |
1541 | (feature.flags & VFIO_DEVICE_FEATURE_SET) && | |
1542 | (feature.flags & VFIO_DEVICE_FEATURE_GET)) | |
1543 | return -EINVAL; | |
1544 | ||
1545 | switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) { | |
115dcec6 JG |
1546 | case VFIO_DEVICE_FEATURE_MIGRATION: |
1547 | return vfio_ioctl_device_feature_migration( | |
1548 | device, feature.flags, arg->data, | |
1549 | feature.argsz - minsz); | |
1550 | case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE: | |
1551 | return vfio_ioctl_device_feature_mig_device_state( | |
1552 | device, feature.flags, arg->data, | |
1553 | feature.argsz - minsz); | |
80c4b92a YH |
1554 | case VFIO_DEVICE_FEATURE_DMA_LOGGING_START: |
1555 | return vfio_ioctl_device_feature_logging_start( | |
1556 | device, feature.flags, arg->data, | |
1557 | feature.argsz - minsz); | |
1558 | case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP: | |
1559 | return vfio_ioctl_device_feature_logging_stop( | |
1560 | device, feature.flags, arg->data, | |
1561 | feature.argsz - minsz); | |
1562 | case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT: | |
1563 | return vfio_ioctl_device_feature_logging_report( | |
1564 | device, feature.flags, arg->data, | |
1565 | feature.argsz - minsz); | |
445ad495 JG |
1566 | default: |
1567 | if (unlikely(!device->ops->device_feature)) | |
1568 | return -EINVAL; | |
1569 | return device->ops->device_feature(device, feature.flags, | |
1570 | arg->data, | |
1571 | feature.argsz - minsz); | |
1572 | } | |
1573 | } | |
1574 | ||
cba3345c AW |
1575 | static long vfio_device_fops_unl_ioctl(struct file *filep, |
1576 | unsigned int cmd, unsigned long arg) | |
1577 | { | |
1578 | struct vfio_device *device = filep->private_data; | |
8e5c6995 AS |
1579 | int ret; |
1580 | ||
1581 | ret = vfio_device_pm_runtime_get(device); | |
1582 | if (ret) | |
1583 | return ret; | |
cba3345c | 1584 | |
445ad495 JG |
1585 | switch (cmd) { |
1586 | case VFIO_DEVICE_FEATURE: | |
8e5c6995 AS |
1587 | ret = vfio_ioctl_device_feature(device, (void __user *)arg); |
1588 | break; | |
1589 | ||
445ad495 JG |
1590 | default: |
1591 | if (unlikely(!device->ops->ioctl)) | |
8e5c6995 AS |
1592 | ret = -EINVAL; |
1593 | else | |
1594 | ret = device->ops->ioctl(device, cmd, arg); | |
1595 | break; | |
445ad495 | 1596 | } |
8e5c6995 AS |
1597 | |
1598 | vfio_device_pm_runtime_put(device); | |
1599 | return ret; | |
cba3345c AW |
1600 | } |
1601 | ||
1602 | static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf, | |
1603 | size_t count, loff_t *ppos) | |
1604 | { | |
1605 | struct vfio_device *device = filep->private_data; | |
1606 | ||
1607 | if (unlikely(!device->ops->read)) | |
1608 | return -EINVAL; | |
1609 | ||
6df62c5b | 1610 | return device->ops->read(device, buf, count, ppos); |
cba3345c AW |
1611 | } |
1612 | ||
1613 | static ssize_t vfio_device_fops_write(struct file *filep, | |
1614 | const char __user *buf, | |
1615 | size_t count, loff_t *ppos) | |
1616 | { | |
1617 | struct vfio_device *device = filep->private_data; | |
1618 | ||
1619 | if (unlikely(!device->ops->write)) | |
1620 | return -EINVAL; | |
1621 | ||
6df62c5b | 1622 | return device->ops->write(device, buf, count, ppos); |
cba3345c AW |
1623 | } |
1624 | ||
1625 | static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) | |
1626 | { | |
1627 | struct vfio_device *device = filep->private_data; | |
1628 | ||
1629 | if (unlikely(!device->ops->mmap)) | |
1630 | return -EINVAL; | |
1631 | ||
6df62c5b | 1632 | return device->ops->mmap(device, vma); |
cba3345c AW |
1633 | } |
1634 | ||
cba3345c AW |
1635 | static const struct file_operations vfio_device_fops = { |
1636 | .owner = THIS_MODULE, | |
1637 | .release = vfio_device_fops_release, | |
1638 | .read = vfio_device_fops_read, | |
1639 | .write = vfio_device_fops_write, | |
1640 | .unlocked_ioctl = vfio_device_fops_unl_ioctl, | |
407e9ef7 | 1641 | .compat_ioctl = compat_ptr_ioctl, |
cba3345c AW |
1642 | .mmap = vfio_device_fops_mmap, |
1643 | }; | |
1644 | ||
50d63b5b JG |
1645 | /** |
1646 | * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file | |
1647 | * @file: VFIO group file | |
6cdd9782 | 1648 | * |
819da99a JG |
1649 | * The returned iommu_group is valid as long as a ref is held on the file. This |
1650 | * returns a reference on the group. This function is deprecated, only the SPAPR | |
1651 | * path in kvm should call it. | |
6cdd9782 | 1652 | */ |
50d63b5b | 1653 | struct iommu_group *vfio_file_iommu_group(struct file *file) |
6cdd9782 | 1654 | { |
50d63b5b | 1655 | struct vfio_group *group = file->private_data; |
3dd59a7d | 1656 | struct iommu_group *iommu_group = NULL; |
6cdd9782 | 1657 | |
4b22ef04 JG |
1658 | if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU)) |
1659 | return NULL; | |
1660 | ||
1661 | if (!vfio_file_is_group(file)) | |
50d63b5b | 1662 | return NULL; |
3dd59a7d JG |
1663 | |
1664 | mutex_lock(&group->group_lock); | |
1665 | if (group->iommu_group) { | |
1666 | iommu_group = group->iommu_group; | |
1667 | iommu_group_ref_get(iommu_group); | |
1668 | } | |
1669 | mutex_unlock(&group->group_lock); | |
1670 | return iommu_group; | |
6cdd9782 | 1671 | } |
50d63b5b | 1672 | EXPORT_SYMBOL_GPL(vfio_file_iommu_group); |
6cdd9782 | 1673 | |
4b22ef04 JG |
1674 | /** |
1675 | * vfio_file_is_group - True if the file is usable with VFIO aPIS | |
1676 | * @file: VFIO group file | |
1677 | */ | |
1678 | bool vfio_file_is_group(struct file *file) | |
1679 | { | |
1680 | return file->f_op == &vfio_group_fops; | |
1681 | } | |
1682 | EXPORT_SYMBOL_GPL(vfio_file_is_group); | |
1683 | ||
a905ad04 JG |
1684 | /** |
1685 | * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file | |
1686 | * is always CPU cache coherent | |
1687 | * @file: VFIO group file | |
c0560f51 | 1688 | * |
a905ad04 JG |
1689 | * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop |
1690 | * bit in DMA transactions. A return of false indicates that the user has | |
1691 | * rights to access additional instructions such as wbinvd on x86. | |
c0560f51 | 1692 | */ |
a905ad04 | 1693 | bool vfio_file_enforced_coherent(struct file *file) |
c0560f51 | 1694 | { |
a905ad04 | 1695 | struct vfio_group *group = file->private_data; |
0d8227b6 JG |
1696 | struct vfio_device *device; |
1697 | bool ret = true; | |
c0560f51 | 1698 | |
b1b8132a | 1699 | if (!vfio_file_is_group(file)) |
a905ad04 | 1700 | return true; |
c0560f51 | 1701 | |
0d8227b6 JG |
1702 | /* |
1703 | * If the device does not have IOMMU_CAP_ENFORCE_CACHE_COHERENCY then | |
1704 | * any domain later attached to it will also not support it. If the cap | |
1705 | * is set then the iommu_domain eventually attached to the device/group | |
1706 | * must use a domain with enforce_cache_coherency(). | |
1707 | */ | |
1708 | mutex_lock(&group->device_lock); | |
1709 | list_for_each_entry(device, &group->device_list, group_next) { | |
1710 | if (!device_iommu_capable(device->dev, | |
1711 | IOMMU_CAP_ENFORCE_CACHE_COHERENCY)) { | |
1712 | ret = false; | |
1713 | break; | |
1714 | } | |
c0560f51 | 1715 | } |
0d8227b6 | 1716 | mutex_unlock(&group->device_lock); |
a905ad04 | 1717 | return ret; |
c0560f51 | 1718 | } |
a905ad04 | 1719 | EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent); |
c0560f51 | 1720 | |
ba70a89f JG |
1721 | /** |
1722 | * vfio_file_set_kvm - Link a kvm with VFIO drivers | |
1723 | * @file: VFIO group file | |
1724 | * @kvm: KVM to link | |
1725 | * | |
421cfe65 MR |
1726 | * When a VFIO device is first opened the KVM will be available in |
1727 | * device->kvm if one was associated with the group. | |
ba70a89f JG |
1728 | */ |
1729 | void vfio_file_set_kvm(struct file *file, struct kvm *kvm) | |
6cdd9782 | 1730 | { |
ba70a89f | 1731 | struct vfio_group *group = file->private_data; |
6cdd9782 | 1732 | |
b1b8132a | 1733 | if (!vfio_file_is_group(file)) |
ba70a89f | 1734 | return; |
5d6dee80 | 1735 | |
c82e81ab | 1736 | mutex_lock(&group->group_lock); |
ba70a89f | 1737 | group->kvm = kvm; |
c82e81ab | 1738 | mutex_unlock(&group->group_lock); |
5d6dee80 | 1739 | } |
ba70a89f | 1740 | EXPORT_SYMBOL_GPL(vfio_file_set_kvm); |
5d6dee80 | 1741 | |
6a985ae8 JG |
1742 | /** |
1743 | * vfio_file_has_dev - True if the VFIO file is a handle for device | |
1744 | * @file: VFIO file to check | |
1745 | * @device: Device that must be part of the file | |
1746 | * | |
1747 | * Returns true if given file has permission to manipulate the given device. | |
1748 | */ | |
1749 | bool vfio_file_has_dev(struct file *file, struct vfio_device *device) | |
6cdd9782 | 1750 | { |
6a985ae8 | 1751 | struct vfio_group *group = file->private_data; |
6cdd9782 | 1752 | |
b1b8132a | 1753 | if (!vfio_file_is_group(file)) |
6a985ae8 JG |
1754 | return false; |
1755 | ||
1756 | return group == device->group; | |
88d7ab89 | 1757 | } |
6a985ae8 | 1758 | EXPORT_SYMBOL_GPL(vfio_file_has_dev); |
88d7ab89 | 1759 | |
3b9a2d57 | 1760 | /* |
d7a8d5ed AW |
1761 | * Sub-module support |
1762 | */ | |
1763 | /* | |
1764 | * Helper for managing a buffer of info chain capabilities, allocate or | |
1765 | * reallocate a buffer with additional @size, filling in @id and @version | |
1766 | * of the capability. A pointer to the new capability is returned. | |
1767 | * | |
1768 | * NB. The chain is based at the head of the buffer, so new entries are | |
1769 | * added to the tail, vfio_info_cap_shift() should be called to fixup the | |
1770 | * next offsets prior to copying to the user buffer. | |
1771 | */ | |
1772 | struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps, | |
1773 | size_t size, u16 id, u16 version) | |
1774 | { | |
1775 | void *buf; | |
1776 | struct vfio_info_cap_header *header, *tmp; | |
1777 | ||
1778 | buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL); | |
1779 | if (!buf) { | |
1780 | kfree(caps->buf); | |
6641085e | 1781 | caps->buf = NULL; |
d7a8d5ed AW |
1782 | caps->size = 0; |
1783 | return ERR_PTR(-ENOMEM); | |
1784 | } | |
1785 | ||
1786 | caps->buf = buf; | |
1787 | header = buf + caps->size; | |
1788 | ||
1789 | /* Eventually copied to user buffer, zero */ | |
1790 | memset(header, 0, size); | |
1791 | ||
1792 | header->id = id; | |
1793 | header->version = version; | |
1794 | ||
1795 | /* Add to the end of the capability chain */ | |
5ba6de98 | 1796 | for (tmp = buf; tmp->next; tmp = buf + tmp->next) |
d7a8d5ed AW |
1797 | ; /* nothing */ |
1798 | ||
1799 | tmp->next = caps->size; | |
1800 | caps->size += size; | |
1801 | ||
1802 | return header; | |
1803 | } | |
1804 | EXPORT_SYMBOL_GPL(vfio_info_cap_add); | |
1805 | ||
1806 | void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset) | |
1807 | { | |
1808 | struct vfio_info_cap_header *tmp; | |
5ba6de98 | 1809 | void *buf = (void *)caps->buf; |
d7a8d5ed | 1810 | |
5ba6de98 | 1811 | for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset) |
d7a8d5ed AW |
1812 | tmp->next += offset; |
1813 | } | |
b3c0a866 | 1814 | EXPORT_SYMBOL(vfio_info_cap_shift); |
d7a8d5ed | 1815 | |
dda01f78 AW |
1816 | int vfio_info_add_capability(struct vfio_info_cap *caps, |
1817 | struct vfio_info_cap_header *cap, size_t size) | |
b3c0a866 KW |
1818 | { |
1819 | struct vfio_info_cap_header *header; | |
b3c0a866 | 1820 | |
dda01f78 | 1821 | header = vfio_info_cap_add(caps, size, cap->id, cap->version); |
b3c0a866 KW |
1822 | if (IS_ERR(header)) |
1823 | return PTR_ERR(header); | |
1824 | ||
dda01f78 | 1825 | memcpy(header + 1, cap + 1, size - sizeof(*header)); |
b3c0a866 | 1826 | |
b3c0a866 KW |
1827 | return 0; |
1828 | } | |
b3c0a866 | 1829 | EXPORT_SYMBOL(vfio_info_add_capability); |
2169037d | 1830 | |
c747f08a KW |
1831 | int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, |
1832 | int max_irq_type, size_t *data_size) | |
1833 | { | |
1834 | unsigned long minsz; | |
1835 | size_t size; | |
1836 | ||
1837 | minsz = offsetofend(struct vfio_irq_set, count); | |
1838 | ||
1839 | if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) || | |
1840 | (hdr->count >= (U32_MAX - hdr->start)) || | |
1841 | (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK | | |
1842 | VFIO_IRQ_SET_ACTION_TYPE_MASK))) | |
1843 | return -EINVAL; | |
1844 | ||
1845 | if (data_size) | |
1846 | *data_size = 0; | |
1847 | ||
1848 | if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs) | |
1849 | return -EINVAL; | |
1850 | ||
1851 | switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) { | |
1852 | case VFIO_IRQ_SET_DATA_NONE: | |
1853 | size = 0; | |
1854 | break; | |
1855 | case VFIO_IRQ_SET_DATA_BOOL: | |
1856 | size = sizeof(uint8_t); | |
1857 | break; | |
1858 | case VFIO_IRQ_SET_DATA_EVENTFD: | |
1859 | size = sizeof(int32_t); | |
1860 | break; | |
1861 | default: | |
1862 | return -EINVAL; | |
1863 | } | |
1864 | ||
1865 | if (size) { | |
1866 | if (hdr->argsz - minsz < hdr->count * size) | |
1867 | return -EINVAL; | |
1868 | ||
1869 | if (!data_size) | |
1870 | return -EINVAL; | |
1871 | ||
1872 | *data_size = hdr->count * size; | |
1873 | } | |
1874 | ||
1875 | return 0; | |
1876 | } | |
1877 | EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); | |
1878 | ||
3b9a2d57 | 1879 | /* |
cba3345c AW |
1880 | * Module/class support |
1881 | */ | |
1882 | static char *vfio_devnode(struct device *dev, umode_t *mode) | |
1883 | { | |
1884 | return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev)); | |
1885 | } | |
1886 | ||
c41da462 JG |
1887 | static int __init vfio_init(void) |
1888 | { | |
1889 | int ret; | |
1890 | ||
1891 | ida_init(&vfio.group_ida); | |
1892 | ida_init(&vfio.device_ida); | |
1893 | mutex_init(&vfio.group_lock); | |
1894 | INIT_LIST_HEAD(&vfio.group_list); | |
1895 | ||
1896 | ret = vfio_container_init(); | |
1897 | if (ret) | |
1898 | return ret; | |
1899 | ||
d1099901 | 1900 | /* /dev/vfio/$GROUP */ |
cba3345c AW |
1901 | vfio.class = class_create(THIS_MODULE, "vfio"); |
1902 | if (IS_ERR(vfio.class)) { | |
1903 | ret = PTR_ERR(vfio.class); | |
3c28a761 | 1904 | goto err_group_class; |
cba3345c AW |
1905 | } |
1906 | ||
1907 | vfio.class->devnode = vfio_devnode; | |
1908 | ||
3c28a761 YL |
1909 | /* /sys/class/vfio-dev/vfioX */ |
1910 | vfio.device_class = class_create(THIS_MODULE, "vfio-dev"); | |
1911 | if (IS_ERR(vfio.device_class)) { | |
1912 | ret = PTR_ERR(vfio.device_class); | |
1913 | goto err_dev_class; | |
1914 | } | |
1915 | ||
8bcb64a5 | 1916 | ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio"); |
cba3345c | 1917 | if (ret) |
d1099901 | 1918 | goto err_alloc_chrdev; |
cba3345c | 1919 | |
a13b1e47 | 1920 | pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); |
cba3345c AW |
1921 | return 0; |
1922 | ||
d1099901 | 1923 | err_alloc_chrdev: |
3c28a761 YL |
1924 | class_destroy(vfio.device_class); |
1925 | vfio.device_class = NULL; | |
1926 | err_dev_class: | |
cba3345c AW |
1927 | class_destroy(vfio.class); |
1928 | vfio.class = NULL; | |
3c28a761 | 1929 | err_group_class: |
c41da462 | 1930 | vfio_container_cleanup(); |
cba3345c AW |
1931 | return ret; |
1932 | } | |
1933 | ||
1934 | static void __exit vfio_cleanup(void) | |
1935 | { | |
1936 | WARN_ON(!list_empty(&vfio.group_list)); | |
1937 | ||
3c28a761 | 1938 | ida_destroy(&vfio.device_ida); |
9cef7391 | 1939 | ida_destroy(&vfio.group_ida); |
8bcb64a5 | 1940 | unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); |
3c28a761 YL |
1941 | class_destroy(vfio.device_class); |
1942 | vfio.device_class = NULL; | |
cba3345c | 1943 | class_destroy(vfio.class); |
c41da462 | 1944 | vfio_container_cleanup(); |
cba3345c | 1945 | vfio.class = NULL; |
2fd585f4 | 1946 | xa_destroy(&vfio_device_set_xa); |
cba3345c AW |
1947 | } |
1948 | ||
1949 | module_init(vfio_init); | |
1950 | module_exit(vfio_cleanup); | |
1951 | ||
1952 | MODULE_VERSION(DRIVER_VERSION); | |
1953 | MODULE_LICENSE("GPL v2"); | |
1954 | MODULE_AUTHOR(DRIVER_AUTHOR); | |
1955 | MODULE_DESCRIPTION(DRIVER_DESC); | |
d1099901 AW |
1956 | MODULE_ALIAS_MISCDEV(VFIO_MINOR); |
1957 | MODULE_ALIAS("devname:vfio/vfio"); | |
0ca582fd | 1958 | MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce"); |