Merge tag 'char-misc-4.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh...
[linux-2.6-block.git] / arch / x86 / kvm / assigned-dev.c
CommitLineData
bfd99ff5
AK
1/*
2 * Kernel-based Virtual Machine - device assignment support
3 *
221d059d 4 * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates.
bfd99ff5
AK
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2. See
7 * the COPYING file in the top-level directory.
8 *
9 */
10
11#include <linux/kvm_host.h>
12#include <linux/kvm.h>
13#include <linux/uaccess.h>
14#include <linux/vmalloc.h>
15#include <linux/errno.h>
16#include <linux/spinlock.h>
17#include <linux/pci.h>
18#include <linux/interrupt.h>
5a0e3ad6 19#include <linux/slab.h>
3d27e23b
AW
20#include <linux/namei.h>
21#include <linux/fs.h>
bfd99ff5 22#include "irq.h"
c9eab58f 23#include "assigned-dev.h"
8a22f234 24#include "trace/events/kvm.h"
c9eab58f
PB
25
26struct kvm_assigned_dev_kernel {
27 struct kvm_irq_ack_notifier ack_notifier;
28 struct list_head list;
29 int assigned_dev_id;
30 int host_segnr;
31 int host_busnr;
32 int host_devfn;
33 unsigned int entries_nr;
34 int host_irq;
35 bool host_irq_disabled;
36 bool pci_2_3;
37 struct msix_entry *host_msix_entries;
38 int guest_irq;
39 struct msix_entry *guest_msix_entries;
40 unsigned long irq_requested_type;
41 int irq_source_id;
42 int flags;
43 struct pci_dev *dev;
44 struct kvm *kvm;
45 spinlock_t intx_lock;
46 spinlock_t intx_mask_lock;
47 char irq_name[32];
48 struct pci_saved_state *pci_saved_state;
49};
bfd99ff5
AK
50
51static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
52 int assigned_dev_id)
53{
bfd99ff5
AK
54 struct kvm_assigned_dev_kernel *match;
55
652fc08d 56 list_for_each_entry(match, head, list) {
bfd99ff5
AK
57 if (match->assigned_dev_id == assigned_dev_id)
58 return match;
59 }
60 return NULL;
61}
62
63static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
64 *assigned_dev, int irq)
65{
66 int i, index;
67 struct msix_entry *host_msix_entries;
68
69 host_msix_entries = assigned_dev->host_msix_entries;
70
71 index = -1;
72 for (i = 0; i < assigned_dev->entries_nr; i++)
73 if (irq == host_msix_entries[i].vector) {
74 index = i;
75 break;
76 }
b93a3553 77 if (index < 0)
bfd99ff5 78 printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
bfd99ff5
AK
79
80 return index;
81}
82
07700a94 83static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id)
bfd99ff5 84{
0645211c 85 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
07700a94 86 int ret;
bfd99ff5 87
07700a94
JK
88 spin_lock(&assigned_dev->intx_lock);
89 if (pci_check_and_mask_intx(assigned_dev->dev)) {
90 assigned_dev->host_irq_disabled = true;
91 ret = IRQ_WAKE_THREAD;
92 } else
93 ret = IRQ_NONE;
94 spin_unlock(&assigned_dev->intx_lock);
95
96 return ret;
97}
98
99static void
100kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev,
101 int vector)
102{
103 if (unlikely(assigned_dev->irq_requested_type &
104 KVM_DEV_IRQ_GUEST_INTX)) {
cf9eeac4 105 spin_lock(&assigned_dev->intx_mask_lock);
07700a94
JK
106 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX))
107 kvm_set_irq(assigned_dev->kvm,
aa2fbe6d
YZ
108 assigned_dev->irq_source_id, vector, 1,
109 false);
cf9eeac4 110 spin_unlock(&assigned_dev->intx_mask_lock);
07700a94
JK
111 } else
112 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
aa2fbe6d 113 vector, 1, false);
07700a94
JK
114}
115
116static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
117{
118 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
119
120 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
121 spin_lock_irq(&assigned_dev->intx_lock);
0645211c
JK
122 disable_irq_nosync(irq);
123 assigned_dev->host_irq_disabled = true;
07700a94 124 spin_unlock_irq(&assigned_dev->intx_lock);
0645211c 125 }
bfd99ff5 126
07700a94
JK
127 kvm_assigned_dev_raise_guest_irq(assigned_dev,
128 assigned_dev->guest_irq);
129
130 return IRQ_HANDLED;
131}
132
8a22f234
PB
133/*
134 * Deliver an IRQ in an atomic context if we can, or return a failure,
135 * user can retry in a process context.
136 * Return value:
137 * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
138 * Other values - No need to retry.
139 */
140static int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq,
141 int level)
142{
143 struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
144 struct kvm_kernel_irq_routing_entry *e;
145 int ret = -EINVAL;
146 int idx;
147
148 trace_kvm_set_irq(irq, level, irq_source_id);
149
150 /*
151 * Injection into either PIC or IOAPIC might need to scan all CPUs,
152 * which would need to be retried from thread context; when same GSI
153 * is connected to both PIC and IOAPIC, we'd have to report a
154 * partial failure here.
155 * Since there's no easy way to do this, we only support injecting MSI
156 * which is limited to 1:1 GSI mapping.
157 */
158 idx = srcu_read_lock(&kvm->irq_srcu);
159 if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
160 e = &entries[0];
161 ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id,
162 irq, level);
163 }
164 srcu_read_unlock(&kvm->irq_srcu, idx);
165 return ret;
166}
167
168
78c63440
MT
169static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
170{
171 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
172 int ret = kvm_set_irq_inatomic(assigned_dev->kvm,
173 assigned_dev->irq_source_id,
174 assigned_dev->guest_irq, 1);
175 return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
176}
177
07700a94
JK
178static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
179{
180 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
181
182 kvm_assigned_dev_raise_guest_irq(assigned_dev,
183 assigned_dev->guest_irq);
cc079396
JK
184
185 return IRQ_HANDLED;
186}
187
78c63440
MT
188static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
189{
190 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
191 int index = find_index_from_host_irq(assigned_dev, irq);
192 u32 vector;
193 int ret = 0;
194
195 if (index >= 0) {
196 vector = assigned_dev->guest_msix_entries[index].vector;
197 ret = kvm_set_irq_inatomic(assigned_dev->kvm,
198 assigned_dev->irq_source_id,
199 vector, 1);
200 }
201
202 return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
203}
204
cc079396
JK
205static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
206{
207 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
208 int index = find_index_from_host_irq(assigned_dev, irq);
209 u32 vector;
210
211 if (index >= 0) {
212 vector = assigned_dev->guest_msix_entries[index].vector;
07700a94 213 kvm_assigned_dev_raise_guest_irq(assigned_dev, vector);
cc079396 214 }
bfd99ff5 215
bfd99ff5
AK
216 return IRQ_HANDLED;
217}
218
219/* Ack the irq line for an assigned device */
220static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
221{
c61fa9d6
JK
222 struct kvm_assigned_dev_kernel *dev =
223 container_of(kian, struct kvm_assigned_dev_kernel,
224 ack_notifier);
bfd99ff5 225
aa2fbe6d 226 kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false);
bfd99ff5 227
cf9eeac4 228 spin_lock(&dev->intx_mask_lock);
07700a94
JK
229
230 if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) {
231 bool reassert = false;
232
233 spin_lock_irq(&dev->intx_lock);
234 /*
235 * The guest IRQ may be shared so this ack can come from an
236 * IRQ for another guest device.
237 */
238 if (dev->host_irq_disabled) {
239 if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3))
240 enable_irq(dev->host_irq);
241 else if (!pci_check_and_unmask_intx(dev->dev))
242 reassert = true;
243 dev->host_irq_disabled = reassert;
244 }
245 spin_unlock_irq(&dev->intx_lock);
246
247 if (reassert)
248 kvm_set_irq(dev->kvm, dev->irq_source_id,
aa2fbe6d 249 dev->guest_irq, 1, false);
bfd99ff5 250 }
07700a94 251
cf9eeac4 252 spin_unlock(&dev->intx_mask_lock);
bfd99ff5
AK
253}
254
255static void deassign_guest_irq(struct kvm *kvm,
256 struct kvm_assigned_dev_kernel *assigned_dev)
257{
c61fa9d6
JK
258 if (assigned_dev->ack_notifier.gsi != -1)
259 kvm_unregister_irq_ack_notifier(kvm,
260 &assigned_dev->ack_notifier);
bfd99ff5 261
0c106b5a 262 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
aa2fbe6d 263 assigned_dev->guest_irq, 0, false);
0c106b5a 264
bfd99ff5
AK
265 if (assigned_dev->irq_source_id != -1)
266 kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
267 assigned_dev->irq_source_id = -1;
268 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
269}
270
271/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
272static void deassign_host_irq(struct kvm *kvm,
273 struct kvm_assigned_dev_kernel *assigned_dev)
274{
275 /*
0645211c 276 * We disable irq here to prevent further events.
bfd99ff5
AK
277 *
278 * Notice this maybe result in nested disable if the interrupt type is
279 * INTx, but it's OK for we are going to free it.
280 *
281 * If this function is a part of VM destroy, please ensure that till
282 * now, the kvm state is still legal for probably we also have to wait
0645211c 283 * on a currently running IRQ handler.
bfd99ff5
AK
284 */
285 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
286 int i;
287 for (i = 0; i < assigned_dev->entries_nr; i++)
0645211c 288 disable_irq(assigned_dev->host_msix_entries[i].vector);
bfd99ff5
AK
289
290 for (i = 0; i < assigned_dev->entries_nr; i++)
291 free_irq(assigned_dev->host_msix_entries[i].vector,
9f9f6b78 292 assigned_dev);
bfd99ff5
AK
293
294 assigned_dev->entries_nr = 0;
295 kfree(assigned_dev->host_msix_entries);
296 kfree(assigned_dev->guest_msix_entries);
297 pci_disable_msix(assigned_dev->dev);
298 } else {
299 /* Deal with MSI and INTx */
07700a94
JK
300 if ((assigned_dev->irq_requested_type &
301 KVM_DEV_IRQ_HOST_INTX) &&
302 (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
303 spin_lock_irq(&assigned_dev->intx_lock);
304 pci_intx(assigned_dev->dev, false);
305 spin_unlock_irq(&assigned_dev->intx_lock);
306 synchronize_irq(assigned_dev->host_irq);
307 } else
308 disable_irq(assigned_dev->host_irq);
bfd99ff5 309
9f9f6b78 310 free_irq(assigned_dev->host_irq, assigned_dev);
bfd99ff5
AK
311
312 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
313 pci_disable_msi(assigned_dev->dev);
314 }
315
316 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
317}
318
319static int kvm_deassign_irq(struct kvm *kvm,
320 struct kvm_assigned_dev_kernel *assigned_dev,
321 unsigned long irq_requested_type)
322{
323 unsigned long guest_irq_type, host_irq_type;
324
325 if (!irqchip_in_kernel(kvm))
326 return -EINVAL;
327 /* no irq assignment to deassign */
328 if (!assigned_dev->irq_requested_type)
329 return -ENXIO;
330
331 host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
332 guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
333
334 if (host_irq_type)
335 deassign_host_irq(kvm, assigned_dev);
336 if (guest_irq_type)
337 deassign_guest_irq(kvm, assigned_dev);
338
339 return 0;
340}
341
342static void kvm_free_assigned_irq(struct kvm *kvm,
343 struct kvm_assigned_dev_kernel *assigned_dev)
344{
345 kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
346}
347
348static void kvm_free_assigned_device(struct kvm *kvm,
349 struct kvm_assigned_dev_kernel
350 *assigned_dev)
351{
352 kvm_free_assigned_irq(kvm, assigned_dev);
353
f8fcfd77
AW
354 pci_reset_function(assigned_dev->dev);
355 if (pci_load_and_free_saved_state(assigned_dev->dev,
356 &assigned_dev->pci_saved_state))
357 printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
358 __func__, dev_name(&assigned_dev->dev->dev));
359 else
360 pci_restore_state(assigned_dev->dev);
bfd99ff5 361
ad0d217c 362 pci_clear_dev_assigned(assigned_dev->dev);
6777829c 363
bfd99ff5
AK
364 pci_release_regions(assigned_dev->dev);
365 pci_disable_device(assigned_dev->dev);
366 pci_dev_put(assigned_dev->dev);
367
368 list_del(&assigned_dev->list);
369 kfree(assigned_dev);
370}
371
372void kvm_free_all_assigned_devices(struct kvm *kvm)
373{
652fc08d 374 struct kvm_assigned_dev_kernel *assigned_dev, *tmp;
bfd99ff5 375
652fc08d
GT
376 list_for_each_entry_safe(assigned_dev, tmp,
377 &kvm->arch.assigned_dev_head, list) {
bfd99ff5
AK
378 kvm_free_assigned_device(kvm, assigned_dev);
379 }
380}
381
382static int assigned_device_enable_host_intx(struct kvm *kvm,
383 struct kvm_assigned_dev_kernel *dev)
384{
07700a94
JK
385 irq_handler_t irq_handler;
386 unsigned long flags;
387
bfd99ff5 388 dev->host_irq = dev->dev->irq;
07700a94
JK
389
390 /*
391 * We can only share the IRQ line with other host devices if we are
392 * able to disable the IRQ source at device-level - independently of
393 * the guest driver. Otherwise host devices may suffer from unbounded
394 * IRQ latencies when the guest keeps the line asserted.
bfd99ff5 395 */
07700a94
JK
396 if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
397 irq_handler = kvm_assigned_dev_intx;
398 flags = IRQF_SHARED;
399 } else {
400 irq_handler = NULL;
401 flags = IRQF_ONESHOT;
402 }
403 if (request_threaded_irq(dev->host_irq, irq_handler,
404 kvm_assigned_dev_thread_intx, flags,
405 dev->irq_name, dev))
bfd99ff5 406 return -EIO;
07700a94
JK
407
408 if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
409 spin_lock_irq(&dev->intx_lock);
410 pci_intx(dev->dev, true);
411 spin_unlock_irq(&dev->intx_lock);
412 }
bfd99ff5
AK
413 return 0;
414}
415
bfd99ff5
AK
416static int assigned_device_enable_host_msi(struct kvm *kvm,
417 struct kvm_assigned_dev_kernel *dev)
418{
419 int r;
420
421 if (!dev->dev->msi_enabled) {
422 r = pci_enable_msi(dev->dev);
423 if (r)
424 return r;
425 }
426
427 dev->host_irq = dev->dev->irq;
a76beb14 428 if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi,
07700a94
JK
429 kvm_assigned_dev_thread_msi, 0,
430 dev->irq_name, dev)) {
bfd99ff5
AK
431 pci_disable_msi(dev->dev);
432 return -EIO;
433 }
434
435 return 0;
436}
bfd99ff5 437
bfd99ff5
AK
438static int assigned_device_enable_host_msix(struct kvm *kvm,
439 struct kvm_assigned_dev_kernel *dev)
440{
441 int i, r = -EINVAL;
442
443 /* host_msix_entries and guest_msix_entries should have been
444 * initialized */
445 if (dev->entries_nr == 0)
446 return r;
447
e8e249d7
AG
448 r = pci_enable_msix_exact(dev->dev,
449 dev->host_msix_entries, dev->entries_nr);
bfd99ff5
AK
450 if (r)
451 return r;
452
453 for (i = 0; i < dev->entries_nr; i++) {
0645211c 454 r = request_threaded_irq(dev->host_msix_entries[i].vector,
a76beb14
AW
455 kvm_assigned_dev_msix,
456 kvm_assigned_dev_thread_msix,
9f9f6b78 457 0, dev->irq_name, dev);
bfd99ff5 458 if (r)
d57e2c07 459 goto err;
bfd99ff5
AK
460 }
461
462 return 0;
d57e2c07 463err:
464 for (i -= 1; i >= 0; i--)
9f9f6b78 465 free_irq(dev->host_msix_entries[i].vector, dev);
d57e2c07 466 pci_disable_msix(dev->dev);
467 return r;
bfd99ff5
AK
468}
469
bfd99ff5
AK
470static int assigned_device_enable_guest_intx(struct kvm *kvm,
471 struct kvm_assigned_dev_kernel *dev,
472 struct kvm_assigned_irq *irq)
473{
474 dev->guest_irq = irq->guest_irq;
475 dev->ack_notifier.gsi = irq->guest_irq;
476 return 0;
477}
478
bfd99ff5
AK
479static int assigned_device_enable_guest_msi(struct kvm *kvm,
480 struct kvm_assigned_dev_kernel *dev,
481 struct kvm_assigned_irq *irq)
482{
483 dev->guest_irq = irq->guest_irq;
484 dev->ack_notifier.gsi = -1;
bfd99ff5
AK
485 return 0;
486}
bfd99ff5 487
bfd99ff5
AK
488static int assigned_device_enable_guest_msix(struct kvm *kvm,
489 struct kvm_assigned_dev_kernel *dev,
490 struct kvm_assigned_irq *irq)
491{
492 dev->guest_irq = irq->guest_irq;
493 dev->ack_notifier.gsi = -1;
bfd99ff5
AK
494 return 0;
495}
bfd99ff5
AK
496
497static int assign_host_irq(struct kvm *kvm,
498 struct kvm_assigned_dev_kernel *dev,
499 __u32 host_irq_type)
500{
501 int r = -EEXIST;
502
503 if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
504 return r;
505
1e001d49
JK
506 snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s",
507 pci_name(dev->dev));
508
bfd99ff5
AK
509 switch (host_irq_type) {
510 case KVM_DEV_IRQ_HOST_INTX:
511 r = assigned_device_enable_host_intx(kvm, dev);
512 break;
bfd99ff5
AK
513 case KVM_DEV_IRQ_HOST_MSI:
514 r = assigned_device_enable_host_msi(kvm, dev);
515 break;
bfd99ff5
AK
516 case KVM_DEV_IRQ_HOST_MSIX:
517 r = assigned_device_enable_host_msix(kvm, dev);
518 break;
bfd99ff5
AK
519 default:
520 r = -EINVAL;
521 }
07700a94 522 dev->host_irq_disabled = false;
bfd99ff5
AK
523
524 if (!r)
525 dev->irq_requested_type |= host_irq_type;
526
527 return r;
528}
529
530static int assign_guest_irq(struct kvm *kvm,
531 struct kvm_assigned_dev_kernel *dev,
532 struct kvm_assigned_irq *irq,
533 unsigned long guest_irq_type)
534{
535 int id;
536 int r = -EEXIST;
537
538 if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
539 return r;
540
541 id = kvm_request_irq_source_id(kvm);
542 if (id < 0)
543 return id;
544
545 dev->irq_source_id = id;
546
547 switch (guest_irq_type) {
548 case KVM_DEV_IRQ_GUEST_INTX:
549 r = assigned_device_enable_guest_intx(kvm, dev, irq);
550 break;
bfd99ff5
AK
551 case KVM_DEV_IRQ_GUEST_MSI:
552 r = assigned_device_enable_guest_msi(kvm, dev, irq);
553 break;
bfd99ff5
AK
554 case KVM_DEV_IRQ_GUEST_MSIX:
555 r = assigned_device_enable_guest_msix(kvm, dev, irq);
556 break;
bfd99ff5
AK
557 default:
558 r = -EINVAL;
559 }
560
561 if (!r) {
562 dev->irq_requested_type |= guest_irq_type;
c61fa9d6
JK
563 if (dev->ack_notifier.gsi != -1)
564 kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
30d1e0e8 565 } else {
bfd99ff5 566 kvm_free_irq_source_id(kvm, dev->irq_source_id);
30d1e0e8
CG
567 dev->irq_source_id = -1;
568 }
bfd99ff5
AK
569
570 return r;
571}
572
573/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
574static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
575 struct kvm_assigned_irq *assigned_irq)
576{
577 int r = -EINVAL;
578 struct kvm_assigned_dev_kernel *match;
579 unsigned long host_irq_type, guest_irq_type;
580
bfd99ff5
AK
581 if (!irqchip_in_kernel(kvm))
582 return r;
583
584 mutex_lock(&kvm->lock);
585 r = -ENODEV;
586 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
587 assigned_irq->assigned_dev_id);
588 if (!match)
589 goto out;
590
591 host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
592 guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
593
594 r = -EINVAL;
595 /* can only assign one type at a time */
596 if (hweight_long(host_irq_type) > 1)
597 goto out;
598 if (hweight_long(guest_irq_type) > 1)
599 goto out;
600 if (host_irq_type == 0 && guest_irq_type == 0)
601 goto out;
602
603 r = 0;
604 if (host_irq_type)
605 r = assign_host_irq(kvm, match, host_irq_type);
606 if (r)
607 goto out;
608
609 if (guest_irq_type)
610 r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
611out:
612 mutex_unlock(&kvm->lock);
613 return r;
614}
615
616static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
617 struct kvm_assigned_irq
618 *assigned_irq)
619{
620 int r = -ENODEV;
621 struct kvm_assigned_dev_kernel *match;
07700a94 622 unsigned long irq_type;
bfd99ff5
AK
623
624 mutex_lock(&kvm->lock);
625
626 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
627 assigned_irq->assigned_dev_id);
628 if (!match)
629 goto out;
630
07700a94
JK
631 irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK |
632 KVM_DEV_IRQ_GUEST_MASK);
633 r = kvm_deassign_irq(kvm, match, irq_type);
bfd99ff5
AK
634out:
635 mutex_unlock(&kvm->lock);
636 return r;
637}
638
3d27e23b
AW
639/*
640 * We want to test whether the caller has been granted permissions to
641 * use this device. To be able to configure and control the device,
642 * the user needs access to PCI configuration space and BAR resources.
643 * These are accessed through PCI sysfs. PCI config space is often
644 * passed to the process calling this ioctl via file descriptor, so we
645 * can't rely on access to that file. We can check for permissions
646 * on each of the BAR resource files, which is a pretty clear
647 * indicator that the user has been granted access to the device.
648 */
649static int probe_sysfs_permissions(struct pci_dev *dev)
650{
651#ifdef CONFIG_SYSFS
652 int i;
653 bool bar_found = false;
654
655 for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
656 char *kpath, *syspath;
657 struct path path;
658 struct inode *inode;
659 int r;
660
661 if (!pci_resource_len(dev, i))
662 continue;
663
664 kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
665 if (!kpath)
666 return -ENOMEM;
667
668 /* Per sysfs-rules, sysfs is always at /sys */
669 syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
670 kfree(kpath);
671 if (!syspath)
672 return -ENOMEM;
673
674 r = kern_path(syspath, LOOKUP_FOLLOW, &path);
675 kfree(syspath);
676 if (r)
677 return r;
678
bb668734 679 inode = d_backing_inode(path.dentry);
3d27e23b
AW
680
681 r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
682 path_put(&path);
683 if (r)
684 return r;
685
686 bar_found = true;
687 }
688
689 /* If no resources, probably something special */
690 if (!bar_found)
691 return -EPERM;
692
693 return 0;
694#else
695 return -EINVAL; /* No way to control the device without sysfs */
696#endif
697}
698
bfd99ff5
AK
699static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
700 struct kvm_assigned_pci_dev *assigned_dev)
701{
bc6678a3 702 int r = 0, idx;
bfd99ff5
AK
703 struct kvm_assigned_dev_kernel *match;
704 struct pci_dev *dev;
705
42387373
AW
706 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
707 return -EINVAL;
708
bfd99ff5 709 mutex_lock(&kvm->lock);
bc6678a3 710 idx = srcu_read_lock(&kvm->srcu);
bfd99ff5
AK
711
712 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
713 assigned_dev->assigned_dev_id);
714 if (match) {
715 /* device already assigned */
716 r = -EEXIST;
717 goto out;
718 }
719
720 match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
721 if (match == NULL) {
722 printk(KERN_INFO "%s: Couldn't allocate memory\n",
723 __func__);
724 r = -ENOMEM;
725 goto out;
726 }
ab9f4ecb
ZE
727 dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
728 assigned_dev->busnr,
bfd99ff5
AK
729 assigned_dev->devfn);
730 if (!dev) {
731 printk(KERN_INFO "%s: host device not found\n", __func__);
732 r = -EINVAL;
733 goto out_free;
734 }
3d27e23b
AW
735
736 /* Don't allow bridges to be assigned */
f961f728 737 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) {
3d27e23b
AW
738 r = -EPERM;
739 goto out_put;
740 }
741
742 r = probe_sysfs_permissions(dev);
743 if (r)
744 goto out_put;
745
bfd99ff5
AK
746 if (pci_enable_device(dev)) {
747 printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
748 r = -EBUSY;
749 goto out_put;
750 }
751 r = pci_request_regions(dev, "kvm_assigned_device");
752 if (r) {
753 printk(KERN_INFO "%s: Could not get access to device regions\n",
754 __func__);
755 goto out_disable;
756 }
757
758 pci_reset_function(dev);
ed78661f 759 pci_save_state(dev);
f8fcfd77
AW
760 match->pci_saved_state = pci_store_saved_state(dev);
761 if (!match->pci_saved_state)
762 printk(KERN_DEBUG "%s: Couldn't store %s saved state\n",
763 __func__, dev_name(&dev->dev));
07700a94
JK
764
765 if (!pci_intx_mask_supported(dev))
766 assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3;
767
bfd99ff5 768 match->assigned_dev_id = assigned_dev->assigned_dev_id;
ab9f4ecb 769 match->host_segnr = assigned_dev->segnr;
bfd99ff5
AK
770 match->host_busnr = assigned_dev->busnr;
771 match->host_devfn = assigned_dev->devfn;
772 match->flags = assigned_dev->flags;
773 match->dev = dev;
0645211c 774 spin_lock_init(&match->intx_lock);
cf9eeac4 775 spin_lock_init(&match->intx_mask_lock);
bfd99ff5
AK
776 match->irq_source_id = -1;
777 match->kvm = kvm;
778 match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
bfd99ff5
AK
779
780 list_add(&match->list, &kvm->arch.assigned_dev_head);
781
42387373
AW
782 if (!kvm->arch.iommu_domain) {
783 r = kvm_iommu_map_guest(kvm);
bfd99ff5
AK
784 if (r)
785 goto out_list_del;
786 }
c9eab58f 787 r = kvm_assign_device(kvm, match->dev);
42387373
AW
788 if (r)
789 goto out_list_del;
bfd99ff5
AK
790
791out:
bc6678a3 792 srcu_read_unlock(&kvm->srcu, idx);
fae3a353 793 mutex_unlock(&kvm->lock);
bfd99ff5
AK
794 return r;
795out_list_del:
f8fcfd77
AW
796 if (pci_load_and_free_saved_state(dev, &match->pci_saved_state))
797 printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
798 __func__, dev_name(&dev->dev));
bfd99ff5
AK
799 list_del(&match->list);
800 pci_release_regions(dev);
801out_disable:
802 pci_disable_device(dev);
803out_put:
804 pci_dev_put(dev);
805out_free:
806 kfree(match);
bc6678a3 807 srcu_read_unlock(&kvm->srcu, idx);
fae3a353 808 mutex_unlock(&kvm->lock);
bfd99ff5
AK
809 return r;
810}
811
812static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
813 struct kvm_assigned_pci_dev *assigned_dev)
814{
815 int r = 0;
816 struct kvm_assigned_dev_kernel *match;
817
818 mutex_lock(&kvm->lock);
819
820 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
821 assigned_dev->assigned_dev_id);
822 if (!match) {
823 printk(KERN_INFO "%s: device hasn't been assigned before, "
824 "so cannot be deassigned\n", __func__);
825 r = -EINVAL;
826 goto out;
827 }
828
c9eab58f 829 kvm_deassign_device(kvm, match->dev);
bfd99ff5
AK
830
831 kvm_free_assigned_device(kvm, match);
832
833out:
834 mutex_unlock(&kvm->lock);
835 return r;
836}
837
838
bfd99ff5
AK
839static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
840 struct kvm_assigned_msix_nr *entry_nr)
841{
842 int r = 0;
843 struct kvm_assigned_dev_kernel *adev;
844
845 mutex_lock(&kvm->lock);
846
847 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
848 entry_nr->assigned_dev_id);
849 if (!adev) {
850 r = -EINVAL;
851 goto msix_nr_out;
852 }
853
854 if (adev->entries_nr == 0) {
855 adev->entries_nr = entry_nr->entry_nr;
856 if (adev->entries_nr == 0 ||
9f3191ae 857 adev->entries_nr > KVM_MAX_MSIX_PER_DEV) {
bfd99ff5
AK
858 r = -EINVAL;
859 goto msix_nr_out;
860 }
861
862 adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
863 entry_nr->entry_nr,
864 GFP_KERNEL);
865 if (!adev->host_msix_entries) {
866 r = -ENOMEM;
867 goto msix_nr_out;
868 }
0645211c
JK
869 adev->guest_msix_entries =
870 kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr,
871 GFP_KERNEL);
bfd99ff5
AK
872 if (!adev->guest_msix_entries) {
873 kfree(adev->host_msix_entries);
874 r = -ENOMEM;
875 goto msix_nr_out;
876 }
877 } else /* Not allowed set MSI-X number twice */
878 r = -EINVAL;
879msix_nr_out:
880 mutex_unlock(&kvm->lock);
881 return r;
882}
883
884static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
885 struct kvm_assigned_msix_entry *entry)
886{
887 int r = 0, i;
888 struct kvm_assigned_dev_kernel *adev;
889
890 mutex_lock(&kvm->lock);
891
892 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
893 entry->assigned_dev_id);
894
895 if (!adev) {
896 r = -EINVAL;
897 goto msix_entry_out;
898 }
899
900 for (i = 0; i < adev->entries_nr; i++)
901 if (adev->guest_msix_entries[i].vector == 0 ||
902 adev->guest_msix_entries[i].entry == entry->entry) {
903 adev->guest_msix_entries[i].entry = entry->entry;
904 adev->guest_msix_entries[i].vector = entry->gsi;
905 adev->host_msix_entries[i].entry = entry->entry;
906 break;
907 }
908 if (i == adev->entries_nr) {
909 r = -ENOSPC;
910 goto msix_entry_out;
911 }
912
913msix_entry_out:
914 mutex_unlock(&kvm->lock);
915
916 return r;
917}
bfd99ff5 918
07700a94
JK
919static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,
920 struct kvm_assigned_pci_dev *assigned_dev)
921{
922 int r = 0;
923 struct kvm_assigned_dev_kernel *match;
924
925 mutex_lock(&kvm->lock);
926
927 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
928 assigned_dev->assigned_dev_id);
929 if (!match) {
930 r = -ENODEV;
931 goto out;
932 }
933
cf9eeac4 934 spin_lock(&match->intx_mask_lock);
07700a94
JK
935
936 match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX;
937 match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX;
938
939 if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
940 if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) {
941 kvm_set_irq(match->kvm, match->irq_source_id,
aa2fbe6d 942 match->guest_irq, 0, false);
07700a94
JK
943 /*
944 * Masking at hardware-level is performed on demand,
945 * i.e. when an IRQ actually arrives at the host.
946 */
947 } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
948 /*
949 * Unmask the IRQ line if required. Unmasking at
950 * device level will be performed by user space.
951 */
952 spin_lock_irq(&match->intx_lock);
953 if (match->host_irq_disabled) {
954 enable_irq(match->host_irq);
955 match->host_irq_disabled = false;
956 }
957 spin_unlock_irq(&match->intx_lock);
958 }
959 }
960
cf9eeac4 961 spin_unlock(&match->intx_mask_lock);
07700a94
JK
962
963out:
964 mutex_unlock(&kvm->lock);
965 return r;
966}
967
bfd99ff5
AK
968long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
969 unsigned long arg)
970{
971 void __user *argp = (void __user *)arg;
51de271d 972 int r;
bfd99ff5
AK
973
974 switch (ioctl) {
975 case KVM_ASSIGN_PCI_DEVICE: {
976 struct kvm_assigned_pci_dev assigned_dev;
977
978 r = -EFAULT;
979 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
980 goto out;
981 r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
982 if (r)
983 goto out;
984 break;
985 }
986 case KVM_ASSIGN_IRQ: {
987 r = -EOPNOTSUPP;
988 break;
989 }
bfd99ff5
AK
990 case KVM_ASSIGN_DEV_IRQ: {
991 struct kvm_assigned_irq assigned_irq;
992
993 r = -EFAULT;
994 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
995 goto out;
996 r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
997 if (r)
998 goto out;
999 break;
1000 }
1001 case KVM_DEASSIGN_DEV_IRQ: {
1002 struct kvm_assigned_irq assigned_irq;
1003
1004 r = -EFAULT;
1005 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
1006 goto out;
1007 r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
1008 if (r)
1009 goto out;
1010 break;
1011 }
bfd99ff5
AK
1012 case KVM_DEASSIGN_PCI_DEVICE: {
1013 struct kvm_assigned_pci_dev assigned_dev;
1014
1015 r = -EFAULT;
1016 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
1017 goto out;
1018 r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
1019 if (r)
1020 goto out;
1021 break;
1022 }
bfd99ff5
AK
1023 case KVM_ASSIGN_SET_MSIX_NR: {
1024 struct kvm_assigned_msix_nr entry_nr;
1025 r = -EFAULT;
1026 if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
1027 goto out;
1028 r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
1029 if (r)
1030 goto out;
1031 break;
1032 }
1033 case KVM_ASSIGN_SET_MSIX_ENTRY: {
1034 struct kvm_assigned_msix_entry entry;
1035 r = -EFAULT;
1036 if (copy_from_user(&entry, argp, sizeof entry))
1037 goto out;
1038 r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
1039 if (r)
1040 goto out;
1041 break;
1042 }
07700a94
JK
1043 case KVM_ASSIGN_SET_INTX_MASK: {
1044 struct kvm_assigned_pci_dev assigned_dev;
1045
1046 r = -EFAULT;
1047 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
1048 goto out;
1049 r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev);
1050 break;
1051 }
51de271d
JK
1052 default:
1053 r = -ENOTTY;
1054 break;
bfd99ff5
AK
1055 }
1056out:
1057 return r;
1058}