Commit | Line | Data |
---|---|---|
bfd99ff5 AK |
1 | /* |
2 | * Kernel-based Virtual Machine - device assignment support | |
3 | * | |
221d059d | 4 | * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates. |
bfd99ff5 AK |
5 | * |
6 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
7 | * the COPYING file in the top-level directory. | |
8 | * | |
9 | */ | |
10 | ||
11 | #include <linux/kvm_host.h> | |
12 | #include <linux/kvm.h> | |
13 | #include <linux/uaccess.h> | |
14 | #include <linux/vmalloc.h> | |
15 | #include <linux/errno.h> | |
16 | #include <linux/spinlock.h> | |
17 | #include <linux/pci.h> | |
18 | #include <linux/interrupt.h> | |
5a0e3ad6 | 19 | #include <linux/slab.h> |
3d27e23b AW |
20 | #include <linux/namei.h> |
21 | #include <linux/fs.h> | |
bfd99ff5 | 22 | #include "irq.h" |
c9eab58f PB |
23 | #include "assigned-dev.h" |
24 | ||
25 | struct kvm_assigned_dev_kernel { | |
26 | struct kvm_irq_ack_notifier ack_notifier; | |
27 | struct list_head list; | |
28 | int assigned_dev_id; | |
29 | int host_segnr; | |
30 | int host_busnr; | |
31 | int host_devfn; | |
32 | unsigned int entries_nr; | |
33 | int host_irq; | |
34 | bool host_irq_disabled; | |
35 | bool pci_2_3; | |
36 | struct msix_entry *host_msix_entries; | |
37 | int guest_irq; | |
38 | struct msix_entry *guest_msix_entries; | |
39 | unsigned long irq_requested_type; | |
40 | int irq_source_id; | |
41 | int flags; | |
42 | struct pci_dev *dev; | |
43 | struct kvm *kvm; | |
44 | spinlock_t intx_lock; | |
45 | spinlock_t intx_mask_lock; | |
46 | char irq_name[32]; | |
47 | struct pci_saved_state *pci_saved_state; | |
48 | }; | |
bfd99ff5 AK |
49 | |
50 | static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, | |
51 | int assigned_dev_id) | |
52 | { | |
53 | struct list_head *ptr; | |
54 | struct kvm_assigned_dev_kernel *match; | |
55 | ||
56 | list_for_each(ptr, head) { | |
57 | match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); | |
58 | if (match->assigned_dev_id == assigned_dev_id) | |
59 | return match; | |
60 | } | |
61 | return NULL; | |
62 | } | |
63 | ||
64 | static int find_index_from_host_irq(struct kvm_assigned_dev_kernel | |
65 | *assigned_dev, int irq) | |
66 | { | |
67 | int i, index; | |
68 | struct msix_entry *host_msix_entries; | |
69 | ||
70 | host_msix_entries = assigned_dev->host_msix_entries; | |
71 | ||
72 | index = -1; | |
73 | for (i = 0; i < assigned_dev->entries_nr; i++) | |
74 | if (irq == host_msix_entries[i].vector) { | |
75 | index = i; | |
76 | break; | |
77 | } | |
b93a3553 | 78 | if (index < 0) |
bfd99ff5 | 79 | printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); |
bfd99ff5 AK |
80 | |
81 | return index; | |
82 | } | |
83 | ||
07700a94 | 84 | static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id) |
bfd99ff5 | 85 | { |
0645211c | 86 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; |
07700a94 | 87 | int ret; |
bfd99ff5 | 88 | |
07700a94 JK |
89 | spin_lock(&assigned_dev->intx_lock); |
90 | if (pci_check_and_mask_intx(assigned_dev->dev)) { | |
91 | assigned_dev->host_irq_disabled = true; | |
92 | ret = IRQ_WAKE_THREAD; | |
93 | } else | |
94 | ret = IRQ_NONE; | |
95 | spin_unlock(&assigned_dev->intx_lock); | |
96 | ||
97 | return ret; | |
98 | } | |
99 | ||
100 | static void | |
101 | kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev, | |
102 | int vector) | |
103 | { | |
104 | if (unlikely(assigned_dev->irq_requested_type & | |
105 | KVM_DEV_IRQ_GUEST_INTX)) { | |
cf9eeac4 | 106 | spin_lock(&assigned_dev->intx_mask_lock); |
07700a94 JK |
107 | if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) |
108 | kvm_set_irq(assigned_dev->kvm, | |
aa2fbe6d YZ |
109 | assigned_dev->irq_source_id, vector, 1, |
110 | false); | |
cf9eeac4 | 111 | spin_unlock(&assigned_dev->intx_mask_lock); |
07700a94 JK |
112 | } else |
113 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | |
aa2fbe6d | 114 | vector, 1, false); |
07700a94 JK |
115 | } |
116 | ||
117 | static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id) | |
118 | { | |
119 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | |
120 | ||
121 | if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { | |
122 | spin_lock_irq(&assigned_dev->intx_lock); | |
0645211c JK |
123 | disable_irq_nosync(irq); |
124 | assigned_dev->host_irq_disabled = true; | |
07700a94 | 125 | spin_unlock_irq(&assigned_dev->intx_lock); |
0645211c | 126 | } |
bfd99ff5 | 127 | |
07700a94 JK |
128 | kvm_assigned_dev_raise_guest_irq(assigned_dev, |
129 | assigned_dev->guest_irq); | |
130 | ||
131 | return IRQ_HANDLED; | |
132 | } | |
133 | ||
134 | #ifdef __KVM_HAVE_MSI | |
78c63440 MT |
135 | static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id) |
136 | { | |
137 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | |
138 | int ret = kvm_set_irq_inatomic(assigned_dev->kvm, | |
139 | assigned_dev->irq_source_id, | |
140 | assigned_dev->guest_irq, 1); | |
141 | return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED; | |
142 | } | |
143 | ||
07700a94 JK |
144 | static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id) |
145 | { | |
146 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | |
147 | ||
148 | kvm_assigned_dev_raise_guest_irq(assigned_dev, | |
149 | assigned_dev->guest_irq); | |
cc079396 JK |
150 | |
151 | return IRQ_HANDLED; | |
152 | } | |
07700a94 | 153 | #endif |
cc079396 JK |
154 | |
155 | #ifdef __KVM_HAVE_MSIX | |
78c63440 MT |
156 | static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id) |
157 | { | |
158 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | |
159 | int index = find_index_from_host_irq(assigned_dev, irq); | |
160 | u32 vector; | |
161 | int ret = 0; | |
162 | ||
163 | if (index >= 0) { | |
164 | vector = assigned_dev->guest_msix_entries[index].vector; | |
165 | ret = kvm_set_irq_inatomic(assigned_dev->kvm, | |
166 | assigned_dev->irq_source_id, | |
167 | vector, 1); | |
168 | } | |
169 | ||
170 | return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED; | |
171 | } | |
172 | ||
cc079396 JK |
173 | static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) |
174 | { | |
175 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | |
176 | int index = find_index_from_host_irq(assigned_dev, irq); | |
177 | u32 vector; | |
178 | ||
179 | if (index >= 0) { | |
180 | vector = assigned_dev->guest_msix_entries[index].vector; | |
07700a94 | 181 | kvm_assigned_dev_raise_guest_irq(assigned_dev, vector); |
cc079396 | 182 | } |
bfd99ff5 | 183 | |
bfd99ff5 AK |
184 | return IRQ_HANDLED; |
185 | } | |
cc079396 | 186 | #endif |
bfd99ff5 AK |
187 | |
188 | /* Ack the irq line for an assigned device */ | |
189 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | |
190 | { | |
c61fa9d6 JK |
191 | struct kvm_assigned_dev_kernel *dev = |
192 | container_of(kian, struct kvm_assigned_dev_kernel, | |
193 | ack_notifier); | |
bfd99ff5 | 194 | |
aa2fbe6d | 195 | kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false); |
bfd99ff5 | 196 | |
cf9eeac4 | 197 | spin_lock(&dev->intx_mask_lock); |
07700a94 JK |
198 | |
199 | if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) { | |
200 | bool reassert = false; | |
201 | ||
202 | spin_lock_irq(&dev->intx_lock); | |
203 | /* | |
204 | * The guest IRQ may be shared so this ack can come from an | |
205 | * IRQ for another guest device. | |
206 | */ | |
207 | if (dev->host_irq_disabled) { | |
208 | if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) | |
209 | enable_irq(dev->host_irq); | |
210 | else if (!pci_check_and_unmask_intx(dev->dev)) | |
211 | reassert = true; | |
212 | dev->host_irq_disabled = reassert; | |
213 | } | |
214 | spin_unlock_irq(&dev->intx_lock); | |
215 | ||
216 | if (reassert) | |
217 | kvm_set_irq(dev->kvm, dev->irq_source_id, | |
aa2fbe6d | 218 | dev->guest_irq, 1, false); |
bfd99ff5 | 219 | } |
07700a94 | 220 | |
cf9eeac4 | 221 | spin_unlock(&dev->intx_mask_lock); |
bfd99ff5 AK |
222 | } |
223 | ||
224 | static void deassign_guest_irq(struct kvm *kvm, | |
225 | struct kvm_assigned_dev_kernel *assigned_dev) | |
226 | { | |
c61fa9d6 JK |
227 | if (assigned_dev->ack_notifier.gsi != -1) |
228 | kvm_unregister_irq_ack_notifier(kvm, | |
229 | &assigned_dev->ack_notifier); | |
bfd99ff5 | 230 | |
0c106b5a | 231 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, |
aa2fbe6d | 232 | assigned_dev->guest_irq, 0, false); |
0c106b5a | 233 | |
bfd99ff5 AK |
234 | if (assigned_dev->irq_source_id != -1) |
235 | kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); | |
236 | assigned_dev->irq_source_id = -1; | |
237 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); | |
238 | } | |
239 | ||
240 | /* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ | |
241 | static void deassign_host_irq(struct kvm *kvm, | |
242 | struct kvm_assigned_dev_kernel *assigned_dev) | |
243 | { | |
244 | /* | |
0645211c | 245 | * We disable irq here to prevent further events. |
bfd99ff5 AK |
246 | * |
247 | * Notice this maybe result in nested disable if the interrupt type is | |
248 | * INTx, but it's OK for we are going to free it. | |
249 | * | |
250 | * If this function is a part of VM destroy, please ensure that till | |
251 | * now, the kvm state is still legal for probably we also have to wait | |
0645211c | 252 | * on a currently running IRQ handler. |
bfd99ff5 AK |
253 | */ |
254 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | |
255 | int i; | |
256 | for (i = 0; i < assigned_dev->entries_nr; i++) | |
0645211c | 257 | disable_irq(assigned_dev->host_msix_entries[i].vector); |
bfd99ff5 AK |
258 | |
259 | for (i = 0; i < assigned_dev->entries_nr; i++) | |
260 | free_irq(assigned_dev->host_msix_entries[i].vector, | |
9f9f6b78 | 261 | assigned_dev); |
bfd99ff5 AK |
262 | |
263 | assigned_dev->entries_nr = 0; | |
264 | kfree(assigned_dev->host_msix_entries); | |
265 | kfree(assigned_dev->guest_msix_entries); | |
266 | pci_disable_msix(assigned_dev->dev); | |
267 | } else { | |
268 | /* Deal with MSI and INTx */ | |
07700a94 JK |
269 | if ((assigned_dev->irq_requested_type & |
270 | KVM_DEV_IRQ_HOST_INTX) && | |
271 | (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { | |
272 | spin_lock_irq(&assigned_dev->intx_lock); | |
273 | pci_intx(assigned_dev->dev, false); | |
274 | spin_unlock_irq(&assigned_dev->intx_lock); | |
275 | synchronize_irq(assigned_dev->host_irq); | |
276 | } else | |
277 | disable_irq(assigned_dev->host_irq); | |
bfd99ff5 | 278 | |
9f9f6b78 | 279 | free_irq(assigned_dev->host_irq, assigned_dev); |
bfd99ff5 AK |
280 | |
281 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) | |
282 | pci_disable_msi(assigned_dev->dev); | |
283 | } | |
284 | ||
285 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); | |
286 | } | |
287 | ||
288 | static int kvm_deassign_irq(struct kvm *kvm, | |
289 | struct kvm_assigned_dev_kernel *assigned_dev, | |
290 | unsigned long irq_requested_type) | |
291 | { | |
292 | unsigned long guest_irq_type, host_irq_type; | |
293 | ||
294 | if (!irqchip_in_kernel(kvm)) | |
295 | return -EINVAL; | |
296 | /* no irq assignment to deassign */ | |
297 | if (!assigned_dev->irq_requested_type) | |
298 | return -ENXIO; | |
299 | ||
300 | host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; | |
301 | guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; | |
302 | ||
303 | if (host_irq_type) | |
304 | deassign_host_irq(kvm, assigned_dev); | |
305 | if (guest_irq_type) | |
306 | deassign_guest_irq(kvm, assigned_dev); | |
307 | ||
308 | return 0; | |
309 | } | |
310 | ||
311 | static void kvm_free_assigned_irq(struct kvm *kvm, | |
312 | struct kvm_assigned_dev_kernel *assigned_dev) | |
313 | { | |
314 | kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); | |
315 | } | |
316 | ||
317 | static void kvm_free_assigned_device(struct kvm *kvm, | |
318 | struct kvm_assigned_dev_kernel | |
319 | *assigned_dev) | |
320 | { | |
321 | kvm_free_assigned_irq(kvm, assigned_dev); | |
322 | ||
f8fcfd77 AW |
323 | pci_reset_function(assigned_dev->dev); |
324 | if (pci_load_and_free_saved_state(assigned_dev->dev, | |
325 | &assigned_dev->pci_saved_state)) | |
326 | printk(KERN_INFO "%s: Couldn't reload %s saved state\n", | |
327 | __func__, dev_name(&assigned_dev->dev->dev)); | |
328 | else | |
329 | pci_restore_state(assigned_dev->dev); | |
bfd99ff5 | 330 | |
ad0d217c | 331 | pci_clear_dev_assigned(assigned_dev->dev); |
6777829c | 332 | |
bfd99ff5 AK |
333 | pci_release_regions(assigned_dev->dev); |
334 | pci_disable_device(assigned_dev->dev); | |
335 | pci_dev_put(assigned_dev->dev); | |
336 | ||
337 | list_del(&assigned_dev->list); | |
338 | kfree(assigned_dev); | |
339 | } | |
340 | ||
341 | void kvm_free_all_assigned_devices(struct kvm *kvm) | |
342 | { | |
343 | struct list_head *ptr, *ptr2; | |
344 | struct kvm_assigned_dev_kernel *assigned_dev; | |
345 | ||
346 | list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { | |
347 | assigned_dev = list_entry(ptr, | |
348 | struct kvm_assigned_dev_kernel, | |
349 | list); | |
350 | ||
351 | kvm_free_assigned_device(kvm, assigned_dev); | |
352 | } | |
353 | } | |
354 | ||
355 | static int assigned_device_enable_host_intx(struct kvm *kvm, | |
356 | struct kvm_assigned_dev_kernel *dev) | |
357 | { | |
07700a94 JK |
358 | irq_handler_t irq_handler; |
359 | unsigned long flags; | |
360 | ||
bfd99ff5 | 361 | dev->host_irq = dev->dev->irq; |
07700a94 JK |
362 | |
363 | /* | |
364 | * We can only share the IRQ line with other host devices if we are | |
365 | * able to disable the IRQ source at device-level - independently of | |
366 | * the guest driver. Otherwise host devices may suffer from unbounded | |
367 | * IRQ latencies when the guest keeps the line asserted. | |
bfd99ff5 | 368 | */ |
07700a94 JK |
369 | if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { |
370 | irq_handler = kvm_assigned_dev_intx; | |
371 | flags = IRQF_SHARED; | |
372 | } else { | |
373 | irq_handler = NULL; | |
374 | flags = IRQF_ONESHOT; | |
375 | } | |
376 | if (request_threaded_irq(dev->host_irq, irq_handler, | |
377 | kvm_assigned_dev_thread_intx, flags, | |
378 | dev->irq_name, dev)) | |
bfd99ff5 | 379 | return -EIO; |
07700a94 JK |
380 | |
381 | if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { | |
382 | spin_lock_irq(&dev->intx_lock); | |
383 | pci_intx(dev->dev, true); | |
384 | spin_unlock_irq(&dev->intx_lock); | |
385 | } | |
bfd99ff5 AK |
386 | return 0; |
387 | } | |
388 | ||
389 | #ifdef __KVM_HAVE_MSI | |
390 | static int assigned_device_enable_host_msi(struct kvm *kvm, | |
391 | struct kvm_assigned_dev_kernel *dev) | |
392 | { | |
393 | int r; | |
394 | ||
395 | if (!dev->dev->msi_enabled) { | |
396 | r = pci_enable_msi(dev->dev); | |
397 | if (r) | |
398 | return r; | |
399 | } | |
400 | ||
401 | dev->host_irq = dev->dev->irq; | |
a76beb14 | 402 | if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi, |
07700a94 JK |
403 | kvm_assigned_dev_thread_msi, 0, |
404 | dev->irq_name, dev)) { | |
bfd99ff5 AK |
405 | pci_disable_msi(dev->dev); |
406 | return -EIO; | |
407 | } | |
408 | ||
409 | return 0; | |
410 | } | |
411 | #endif | |
412 | ||
413 | #ifdef __KVM_HAVE_MSIX | |
414 | static int assigned_device_enable_host_msix(struct kvm *kvm, | |
415 | struct kvm_assigned_dev_kernel *dev) | |
416 | { | |
417 | int i, r = -EINVAL; | |
418 | ||
419 | /* host_msix_entries and guest_msix_entries should have been | |
420 | * initialized */ | |
421 | if (dev->entries_nr == 0) | |
422 | return r; | |
423 | ||
e8e249d7 AG |
424 | r = pci_enable_msix_exact(dev->dev, |
425 | dev->host_msix_entries, dev->entries_nr); | |
bfd99ff5 AK |
426 | if (r) |
427 | return r; | |
428 | ||
429 | for (i = 0; i < dev->entries_nr; i++) { | |
0645211c | 430 | r = request_threaded_irq(dev->host_msix_entries[i].vector, |
a76beb14 AW |
431 | kvm_assigned_dev_msix, |
432 | kvm_assigned_dev_thread_msix, | |
9f9f6b78 | 433 | 0, dev->irq_name, dev); |
bfd99ff5 | 434 | if (r) |
d57e2c07 | 435 | goto err; |
bfd99ff5 AK |
436 | } |
437 | ||
438 | return 0; | |
d57e2c07 | 439 | err: |
440 | for (i -= 1; i >= 0; i--) | |
9f9f6b78 | 441 | free_irq(dev->host_msix_entries[i].vector, dev); |
d57e2c07 | 442 | pci_disable_msix(dev->dev); |
443 | return r; | |
bfd99ff5 AK |
444 | } |
445 | ||
446 | #endif | |
447 | ||
448 | static int assigned_device_enable_guest_intx(struct kvm *kvm, | |
449 | struct kvm_assigned_dev_kernel *dev, | |
450 | struct kvm_assigned_irq *irq) | |
451 | { | |
452 | dev->guest_irq = irq->guest_irq; | |
453 | dev->ack_notifier.gsi = irq->guest_irq; | |
454 | return 0; | |
455 | } | |
456 | ||
457 | #ifdef __KVM_HAVE_MSI | |
458 | static int assigned_device_enable_guest_msi(struct kvm *kvm, | |
459 | struct kvm_assigned_dev_kernel *dev, | |
460 | struct kvm_assigned_irq *irq) | |
461 | { | |
462 | dev->guest_irq = irq->guest_irq; | |
463 | dev->ack_notifier.gsi = -1; | |
bfd99ff5 AK |
464 | return 0; |
465 | } | |
466 | #endif | |
467 | ||
468 | #ifdef __KVM_HAVE_MSIX | |
469 | static int assigned_device_enable_guest_msix(struct kvm *kvm, | |
470 | struct kvm_assigned_dev_kernel *dev, | |
471 | struct kvm_assigned_irq *irq) | |
472 | { | |
473 | dev->guest_irq = irq->guest_irq; | |
474 | dev->ack_notifier.gsi = -1; | |
bfd99ff5 AK |
475 | return 0; |
476 | } | |
477 | #endif | |
478 | ||
479 | static int assign_host_irq(struct kvm *kvm, | |
480 | struct kvm_assigned_dev_kernel *dev, | |
481 | __u32 host_irq_type) | |
482 | { | |
483 | int r = -EEXIST; | |
484 | ||
485 | if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) | |
486 | return r; | |
487 | ||
1e001d49 JK |
488 | snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s", |
489 | pci_name(dev->dev)); | |
490 | ||
bfd99ff5 AK |
491 | switch (host_irq_type) { |
492 | case KVM_DEV_IRQ_HOST_INTX: | |
493 | r = assigned_device_enable_host_intx(kvm, dev); | |
494 | break; | |
495 | #ifdef __KVM_HAVE_MSI | |
496 | case KVM_DEV_IRQ_HOST_MSI: | |
497 | r = assigned_device_enable_host_msi(kvm, dev); | |
498 | break; | |
499 | #endif | |
500 | #ifdef __KVM_HAVE_MSIX | |
501 | case KVM_DEV_IRQ_HOST_MSIX: | |
502 | r = assigned_device_enable_host_msix(kvm, dev); | |
503 | break; | |
504 | #endif | |
505 | default: | |
506 | r = -EINVAL; | |
507 | } | |
07700a94 | 508 | dev->host_irq_disabled = false; |
bfd99ff5 AK |
509 | |
510 | if (!r) | |
511 | dev->irq_requested_type |= host_irq_type; | |
512 | ||
513 | return r; | |
514 | } | |
515 | ||
516 | static int assign_guest_irq(struct kvm *kvm, | |
517 | struct kvm_assigned_dev_kernel *dev, | |
518 | struct kvm_assigned_irq *irq, | |
519 | unsigned long guest_irq_type) | |
520 | { | |
521 | int id; | |
522 | int r = -EEXIST; | |
523 | ||
524 | if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) | |
525 | return r; | |
526 | ||
527 | id = kvm_request_irq_source_id(kvm); | |
528 | if (id < 0) | |
529 | return id; | |
530 | ||
531 | dev->irq_source_id = id; | |
532 | ||
533 | switch (guest_irq_type) { | |
534 | case KVM_DEV_IRQ_GUEST_INTX: | |
535 | r = assigned_device_enable_guest_intx(kvm, dev, irq); | |
536 | break; | |
537 | #ifdef __KVM_HAVE_MSI | |
538 | case KVM_DEV_IRQ_GUEST_MSI: | |
539 | r = assigned_device_enable_guest_msi(kvm, dev, irq); | |
540 | break; | |
541 | #endif | |
542 | #ifdef __KVM_HAVE_MSIX | |
543 | case KVM_DEV_IRQ_GUEST_MSIX: | |
544 | r = assigned_device_enable_guest_msix(kvm, dev, irq); | |
545 | break; | |
546 | #endif | |
547 | default: | |
548 | r = -EINVAL; | |
549 | } | |
550 | ||
551 | if (!r) { | |
552 | dev->irq_requested_type |= guest_irq_type; | |
c61fa9d6 JK |
553 | if (dev->ack_notifier.gsi != -1) |
554 | kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); | |
30d1e0e8 | 555 | } else { |
bfd99ff5 | 556 | kvm_free_irq_source_id(kvm, dev->irq_source_id); |
30d1e0e8 CG |
557 | dev->irq_source_id = -1; |
558 | } | |
bfd99ff5 AK |
559 | |
560 | return r; | |
561 | } | |
562 | ||
563 | /* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ | |
564 | static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, | |
565 | struct kvm_assigned_irq *assigned_irq) | |
566 | { | |
567 | int r = -EINVAL; | |
568 | struct kvm_assigned_dev_kernel *match; | |
569 | unsigned long host_irq_type, guest_irq_type; | |
570 | ||
bfd99ff5 AK |
571 | if (!irqchip_in_kernel(kvm)) |
572 | return r; | |
573 | ||
574 | mutex_lock(&kvm->lock); | |
575 | r = -ENODEV; | |
576 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | |
577 | assigned_irq->assigned_dev_id); | |
578 | if (!match) | |
579 | goto out; | |
580 | ||
581 | host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); | |
582 | guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); | |
583 | ||
584 | r = -EINVAL; | |
585 | /* can only assign one type at a time */ | |
586 | if (hweight_long(host_irq_type) > 1) | |
587 | goto out; | |
588 | if (hweight_long(guest_irq_type) > 1) | |
589 | goto out; | |
590 | if (host_irq_type == 0 && guest_irq_type == 0) | |
591 | goto out; | |
592 | ||
593 | r = 0; | |
594 | if (host_irq_type) | |
595 | r = assign_host_irq(kvm, match, host_irq_type); | |
596 | if (r) | |
597 | goto out; | |
598 | ||
599 | if (guest_irq_type) | |
600 | r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); | |
601 | out: | |
602 | mutex_unlock(&kvm->lock); | |
603 | return r; | |
604 | } | |
605 | ||
606 | static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, | |
607 | struct kvm_assigned_irq | |
608 | *assigned_irq) | |
609 | { | |
610 | int r = -ENODEV; | |
611 | struct kvm_assigned_dev_kernel *match; | |
07700a94 | 612 | unsigned long irq_type; |
bfd99ff5 AK |
613 | |
614 | mutex_lock(&kvm->lock); | |
615 | ||
616 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | |
617 | assigned_irq->assigned_dev_id); | |
618 | if (!match) | |
619 | goto out; | |
620 | ||
07700a94 JK |
621 | irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK | |
622 | KVM_DEV_IRQ_GUEST_MASK); | |
623 | r = kvm_deassign_irq(kvm, match, irq_type); | |
bfd99ff5 AK |
624 | out: |
625 | mutex_unlock(&kvm->lock); | |
626 | return r; | |
627 | } | |
628 | ||
3d27e23b AW |
629 | /* |
630 | * We want to test whether the caller has been granted permissions to | |
631 | * use this device. To be able to configure and control the device, | |
632 | * the user needs access to PCI configuration space and BAR resources. | |
633 | * These are accessed through PCI sysfs. PCI config space is often | |
634 | * passed to the process calling this ioctl via file descriptor, so we | |
635 | * can't rely on access to that file. We can check for permissions | |
636 | * on each of the BAR resource files, which is a pretty clear | |
637 | * indicator that the user has been granted access to the device. | |
638 | */ | |
639 | static int probe_sysfs_permissions(struct pci_dev *dev) | |
640 | { | |
641 | #ifdef CONFIG_SYSFS | |
642 | int i; | |
643 | bool bar_found = false; | |
644 | ||
645 | for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) { | |
646 | char *kpath, *syspath; | |
647 | struct path path; | |
648 | struct inode *inode; | |
649 | int r; | |
650 | ||
651 | if (!pci_resource_len(dev, i)) | |
652 | continue; | |
653 | ||
654 | kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL); | |
655 | if (!kpath) | |
656 | return -ENOMEM; | |
657 | ||
658 | /* Per sysfs-rules, sysfs is always at /sys */ | |
659 | syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i); | |
660 | kfree(kpath); | |
661 | if (!syspath) | |
662 | return -ENOMEM; | |
663 | ||
664 | r = kern_path(syspath, LOOKUP_FOLLOW, &path); | |
665 | kfree(syspath); | |
666 | if (r) | |
667 | return r; | |
668 | ||
bb668734 | 669 | inode = d_backing_inode(path.dentry); |
3d27e23b AW |
670 | |
671 | r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS); | |
672 | path_put(&path); | |
673 | if (r) | |
674 | return r; | |
675 | ||
676 | bar_found = true; | |
677 | } | |
678 | ||
679 | /* If no resources, probably something special */ | |
680 | if (!bar_found) | |
681 | return -EPERM; | |
682 | ||
683 | return 0; | |
684 | #else | |
685 | return -EINVAL; /* No way to control the device without sysfs */ | |
686 | #endif | |
687 | } | |
688 | ||
bfd99ff5 AK |
689 | static int kvm_vm_ioctl_assign_device(struct kvm *kvm, |
690 | struct kvm_assigned_pci_dev *assigned_dev) | |
691 | { | |
bc6678a3 | 692 | int r = 0, idx; |
bfd99ff5 AK |
693 | struct kvm_assigned_dev_kernel *match; |
694 | struct pci_dev *dev; | |
695 | ||
42387373 AW |
696 | if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)) |
697 | return -EINVAL; | |
698 | ||
bfd99ff5 | 699 | mutex_lock(&kvm->lock); |
bc6678a3 | 700 | idx = srcu_read_lock(&kvm->srcu); |
bfd99ff5 AK |
701 | |
702 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | |
703 | assigned_dev->assigned_dev_id); | |
704 | if (match) { | |
705 | /* device already assigned */ | |
706 | r = -EEXIST; | |
707 | goto out; | |
708 | } | |
709 | ||
710 | match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); | |
711 | if (match == NULL) { | |
712 | printk(KERN_INFO "%s: Couldn't allocate memory\n", | |
713 | __func__); | |
714 | r = -ENOMEM; | |
715 | goto out; | |
716 | } | |
ab9f4ecb ZE |
717 | dev = pci_get_domain_bus_and_slot(assigned_dev->segnr, |
718 | assigned_dev->busnr, | |
bfd99ff5 AK |
719 | assigned_dev->devfn); |
720 | if (!dev) { | |
721 | printk(KERN_INFO "%s: host device not found\n", __func__); | |
722 | r = -EINVAL; | |
723 | goto out_free; | |
724 | } | |
3d27e23b AW |
725 | |
726 | /* Don't allow bridges to be assigned */ | |
f961f728 | 727 | if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) { |
3d27e23b AW |
728 | r = -EPERM; |
729 | goto out_put; | |
730 | } | |
731 | ||
732 | r = probe_sysfs_permissions(dev); | |
733 | if (r) | |
734 | goto out_put; | |
735 | ||
bfd99ff5 AK |
736 | if (pci_enable_device(dev)) { |
737 | printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); | |
738 | r = -EBUSY; | |
739 | goto out_put; | |
740 | } | |
741 | r = pci_request_regions(dev, "kvm_assigned_device"); | |
742 | if (r) { | |
743 | printk(KERN_INFO "%s: Could not get access to device regions\n", | |
744 | __func__); | |
745 | goto out_disable; | |
746 | } | |
747 | ||
748 | pci_reset_function(dev); | |
ed78661f | 749 | pci_save_state(dev); |
f8fcfd77 AW |
750 | match->pci_saved_state = pci_store_saved_state(dev); |
751 | if (!match->pci_saved_state) | |
752 | printk(KERN_DEBUG "%s: Couldn't store %s saved state\n", | |
753 | __func__, dev_name(&dev->dev)); | |
07700a94 JK |
754 | |
755 | if (!pci_intx_mask_supported(dev)) | |
756 | assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3; | |
757 | ||
bfd99ff5 | 758 | match->assigned_dev_id = assigned_dev->assigned_dev_id; |
ab9f4ecb | 759 | match->host_segnr = assigned_dev->segnr; |
bfd99ff5 AK |
760 | match->host_busnr = assigned_dev->busnr; |
761 | match->host_devfn = assigned_dev->devfn; | |
762 | match->flags = assigned_dev->flags; | |
763 | match->dev = dev; | |
0645211c | 764 | spin_lock_init(&match->intx_lock); |
cf9eeac4 | 765 | spin_lock_init(&match->intx_mask_lock); |
bfd99ff5 AK |
766 | match->irq_source_id = -1; |
767 | match->kvm = kvm; | |
768 | match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; | |
bfd99ff5 AK |
769 | |
770 | list_add(&match->list, &kvm->arch.assigned_dev_head); | |
771 | ||
42387373 AW |
772 | if (!kvm->arch.iommu_domain) { |
773 | r = kvm_iommu_map_guest(kvm); | |
bfd99ff5 AK |
774 | if (r) |
775 | goto out_list_del; | |
776 | } | |
c9eab58f | 777 | r = kvm_assign_device(kvm, match->dev); |
42387373 AW |
778 | if (r) |
779 | goto out_list_del; | |
bfd99ff5 AK |
780 | |
781 | out: | |
bc6678a3 | 782 | srcu_read_unlock(&kvm->srcu, idx); |
fae3a353 | 783 | mutex_unlock(&kvm->lock); |
bfd99ff5 AK |
784 | return r; |
785 | out_list_del: | |
f8fcfd77 AW |
786 | if (pci_load_and_free_saved_state(dev, &match->pci_saved_state)) |
787 | printk(KERN_INFO "%s: Couldn't reload %s saved state\n", | |
788 | __func__, dev_name(&dev->dev)); | |
bfd99ff5 AK |
789 | list_del(&match->list); |
790 | pci_release_regions(dev); | |
791 | out_disable: | |
792 | pci_disable_device(dev); | |
793 | out_put: | |
794 | pci_dev_put(dev); | |
795 | out_free: | |
796 | kfree(match); | |
bc6678a3 | 797 | srcu_read_unlock(&kvm->srcu, idx); |
fae3a353 | 798 | mutex_unlock(&kvm->lock); |
bfd99ff5 AK |
799 | return r; |
800 | } | |
801 | ||
802 | static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, | |
803 | struct kvm_assigned_pci_dev *assigned_dev) | |
804 | { | |
805 | int r = 0; | |
806 | struct kvm_assigned_dev_kernel *match; | |
807 | ||
808 | mutex_lock(&kvm->lock); | |
809 | ||
810 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | |
811 | assigned_dev->assigned_dev_id); | |
812 | if (!match) { | |
813 | printk(KERN_INFO "%s: device hasn't been assigned before, " | |
814 | "so cannot be deassigned\n", __func__); | |
815 | r = -EINVAL; | |
816 | goto out; | |
817 | } | |
818 | ||
c9eab58f | 819 | kvm_deassign_device(kvm, match->dev); |
bfd99ff5 AK |
820 | |
821 | kvm_free_assigned_device(kvm, match); | |
822 | ||
823 | out: | |
824 | mutex_unlock(&kvm->lock); | |
825 | return r; | |
826 | } | |
827 | ||
828 | ||
829 | #ifdef __KVM_HAVE_MSIX | |
830 | static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, | |
831 | struct kvm_assigned_msix_nr *entry_nr) | |
832 | { | |
833 | int r = 0; | |
834 | struct kvm_assigned_dev_kernel *adev; | |
835 | ||
836 | mutex_lock(&kvm->lock); | |
837 | ||
838 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | |
839 | entry_nr->assigned_dev_id); | |
840 | if (!adev) { | |
841 | r = -EINVAL; | |
842 | goto msix_nr_out; | |
843 | } | |
844 | ||
845 | if (adev->entries_nr == 0) { | |
846 | adev->entries_nr = entry_nr->entry_nr; | |
847 | if (adev->entries_nr == 0 || | |
9f3191ae | 848 | adev->entries_nr > KVM_MAX_MSIX_PER_DEV) { |
bfd99ff5 AK |
849 | r = -EINVAL; |
850 | goto msix_nr_out; | |
851 | } | |
852 | ||
853 | adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * | |
854 | entry_nr->entry_nr, | |
855 | GFP_KERNEL); | |
856 | if (!adev->host_msix_entries) { | |
857 | r = -ENOMEM; | |
858 | goto msix_nr_out; | |
859 | } | |
0645211c JK |
860 | adev->guest_msix_entries = |
861 | kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr, | |
862 | GFP_KERNEL); | |
bfd99ff5 AK |
863 | if (!adev->guest_msix_entries) { |
864 | kfree(adev->host_msix_entries); | |
865 | r = -ENOMEM; | |
866 | goto msix_nr_out; | |
867 | } | |
868 | } else /* Not allowed set MSI-X number twice */ | |
869 | r = -EINVAL; | |
870 | msix_nr_out: | |
871 | mutex_unlock(&kvm->lock); | |
872 | return r; | |
873 | } | |
874 | ||
875 | static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, | |
876 | struct kvm_assigned_msix_entry *entry) | |
877 | { | |
878 | int r = 0, i; | |
879 | struct kvm_assigned_dev_kernel *adev; | |
880 | ||
881 | mutex_lock(&kvm->lock); | |
882 | ||
883 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | |
884 | entry->assigned_dev_id); | |
885 | ||
886 | if (!adev) { | |
887 | r = -EINVAL; | |
888 | goto msix_entry_out; | |
889 | } | |
890 | ||
891 | for (i = 0; i < adev->entries_nr; i++) | |
892 | if (adev->guest_msix_entries[i].vector == 0 || | |
893 | adev->guest_msix_entries[i].entry == entry->entry) { | |
894 | adev->guest_msix_entries[i].entry = entry->entry; | |
895 | adev->guest_msix_entries[i].vector = entry->gsi; | |
896 | adev->host_msix_entries[i].entry = entry->entry; | |
897 | break; | |
898 | } | |
899 | if (i == adev->entries_nr) { | |
900 | r = -ENOSPC; | |
901 | goto msix_entry_out; | |
902 | } | |
903 | ||
904 | msix_entry_out: | |
905 | mutex_unlock(&kvm->lock); | |
906 | ||
907 | return r; | |
908 | } | |
909 | #endif | |
910 | ||
07700a94 JK |
911 | static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm, |
912 | struct kvm_assigned_pci_dev *assigned_dev) | |
913 | { | |
914 | int r = 0; | |
915 | struct kvm_assigned_dev_kernel *match; | |
916 | ||
917 | mutex_lock(&kvm->lock); | |
918 | ||
919 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | |
920 | assigned_dev->assigned_dev_id); | |
921 | if (!match) { | |
922 | r = -ENODEV; | |
923 | goto out; | |
924 | } | |
925 | ||
cf9eeac4 | 926 | spin_lock(&match->intx_mask_lock); |
07700a94 JK |
927 | |
928 | match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX; | |
929 | match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX; | |
930 | ||
931 | if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { | |
932 | if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) { | |
933 | kvm_set_irq(match->kvm, match->irq_source_id, | |
aa2fbe6d | 934 | match->guest_irq, 0, false); |
07700a94 JK |
935 | /* |
936 | * Masking at hardware-level is performed on demand, | |
937 | * i.e. when an IRQ actually arrives at the host. | |
938 | */ | |
939 | } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { | |
940 | /* | |
941 | * Unmask the IRQ line if required. Unmasking at | |
942 | * device level will be performed by user space. | |
943 | */ | |
944 | spin_lock_irq(&match->intx_lock); | |
945 | if (match->host_irq_disabled) { | |
946 | enable_irq(match->host_irq); | |
947 | match->host_irq_disabled = false; | |
948 | } | |
949 | spin_unlock_irq(&match->intx_lock); | |
950 | } | |
951 | } | |
952 | ||
cf9eeac4 | 953 | spin_unlock(&match->intx_mask_lock); |
07700a94 JK |
954 | |
955 | out: | |
956 | mutex_unlock(&kvm->lock); | |
957 | return r; | |
958 | } | |
959 | ||
bfd99ff5 AK |
960 | long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, |
961 | unsigned long arg) | |
962 | { | |
963 | void __user *argp = (void __user *)arg; | |
51de271d | 964 | int r; |
bfd99ff5 AK |
965 | |
966 | switch (ioctl) { | |
967 | case KVM_ASSIGN_PCI_DEVICE: { | |
968 | struct kvm_assigned_pci_dev assigned_dev; | |
969 | ||
970 | r = -EFAULT; | |
971 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | |
972 | goto out; | |
973 | r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); | |
974 | if (r) | |
975 | goto out; | |
976 | break; | |
977 | } | |
978 | case KVM_ASSIGN_IRQ: { | |
979 | r = -EOPNOTSUPP; | |
980 | break; | |
981 | } | |
bfd99ff5 AK |
982 | case KVM_ASSIGN_DEV_IRQ: { |
983 | struct kvm_assigned_irq assigned_irq; | |
984 | ||
985 | r = -EFAULT; | |
986 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) | |
987 | goto out; | |
988 | r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); | |
989 | if (r) | |
990 | goto out; | |
991 | break; | |
992 | } | |
993 | case KVM_DEASSIGN_DEV_IRQ: { | |
994 | struct kvm_assigned_irq assigned_irq; | |
995 | ||
996 | r = -EFAULT; | |
997 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) | |
998 | goto out; | |
999 | r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); | |
1000 | if (r) | |
1001 | goto out; | |
1002 | break; | |
1003 | } | |
bfd99ff5 AK |
1004 | case KVM_DEASSIGN_PCI_DEVICE: { |
1005 | struct kvm_assigned_pci_dev assigned_dev; | |
1006 | ||
1007 | r = -EFAULT; | |
1008 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | |
1009 | goto out; | |
1010 | r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); | |
1011 | if (r) | |
1012 | goto out; | |
1013 | break; | |
1014 | } | |
bfd99ff5 AK |
1015 | #ifdef __KVM_HAVE_MSIX |
1016 | case KVM_ASSIGN_SET_MSIX_NR: { | |
1017 | struct kvm_assigned_msix_nr entry_nr; | |
1018 | r = -EFAULT; | |
1019 | if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) | |
1020 | goto out; | |
1021 | r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); | |
1022 | if (r) | |
1023 | goto out; | |
1024 | break; | |
1025 | } | |
1026 | case KVM_ASSIGN_SET_MSIX_ENTRY: { | |
1027 | struct kvm_assigned_msix_entry entry; | |
1028 | r = -EFAULT; | |
1029 | if (copy_from_user(&entry, argp, sizeof entry)) | |
1030 | goto out; | |
1031 | r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); | |
1032 | if (r) | |
1033 | goto out; | |
1034 | break; | |
1035 | } | |
1036 | #endif | |
07700a94 JK |
1037 | case KVM_ASSIGN_SET_INTX_MASK: { |
1038 | struct kvm_assigned_pci_dev assigned_dev; | |
1039 | ||
1040 | r = -EFAULT; | |
1041 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | |
1042 | goto out; | |
1043 | r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev); | |
1044 | break; | |
1045 | } | |
51de271d JK |
1046 | default: |
1047 | r = -ENOTTY; | |
1048 | break; | |
bfd99ff5 AK |
1049 | } |
1050 | out: | |
1051 | return r; | |
1052 | } |