VFS: assorted d_backing_inode() annotations
[linux-block.git] / arch / x86 / kvm / assigned-dev.c
CommitLineData
bfd99ff5
AK
1/*
2 * Kernel-based Virtual Machine - device assignment support
3 *
221d059d 4 * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates.
bfd99ff5
AK
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2. See
7 * the COPYING file in the top-level directory.
8 *
9 */
10
11#include <linux/kvm_host.h>
12#include <linux/kvm.h>
13#include <linux/uaccess.h>
14#include <linux/vmalloc.h>
15#include <linux/errno.h>
16#include <linux/spinlock.h>
17#include <linux/pci.h>
18#include <linux/interrupt.h>
5a0e3ad6 19#include <linux/slab.h>
3d27e23b
AW
20#include <linux/namei.h>
21#include <linux/fs.h>
bfd99ff5 22#include "irq.h"
c9eab58f
PB
23#include "assigned-dev.h"
24
25struct kvm_assigned_dev_kernel {
26 struct kvm_irq_ack_notifier ack_notifier;
27 struct list_head list;
28 int assigned_dev_id;
29 int host_segnr;
30 int host_busnr;
31 int host_devfn;
32 unsigned int entries_nr;
33 int host_irq;
34 bool host_irq_disabled;
35 bool pci_2_3;
36 struct msix_entry *host_msix_entries;
37 int guest_irq;
38 struct msix_entry *guest_msix_entries;
39 unsigned long irq_requested_type;
40 int irq_source_id;
41 int flags;
42 struct pci_dev *dev;
43 struct kvm *kvm;
44 spinlock_t intx_lock;
45 spinlock_t intx_mask_lock;
46 char irq_name[32];
47 struct pci_saved_state *pci_saved_state;
48};
bfd99ff5
AK
49
50static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
51 int assigned_dev_id)
52{
53 struct list_head *ptr;
54 struct kvm_assigned_dev_kernel *match;
55
56 list_for_each(ptr, head) {
57 match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
58 if (match->assigned_dev_id == assigned_dev_id)
59 return match;
60 }
61 return NULL;
62}
63
64static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
65 *assigned_dev, int irq)
66{
67 int i, index;
68 struct msix_entry *host_msix_entries;
69
70 host_msix_entries = assigned_dev->host_msix_entries;
71
72 index = -1;
73 for (i = 0; i < assigned_dev->entries_nr; i++)
74 if (irq == host_msix_entries[i].vector) {
75 index = i;
76 break;
77 }
b93a3553 78 if (index < 0)
bfd99ff5 79 printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
bfd99ff5
AK
80
81 return index;
82}
83
07700a94 84static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id)
bfd99ff5 85{
0645211c 86 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
07700a94 87 int ret;
bfd99ff5 88
07700a94
JK
89 spin_lock(&assigned_dev->intx_lock);
90 if (pci_check_and_mask_intx(assigned_dev->dev)) {
91 assigned_dev->host_irq_disabled = true;
92 ret = IRQ_WAKE_THREAD;
93 } else
94 ret = IRQ_NONE;
95 spin_unlock(&assigned_dev->intx_lock);
96
97 return ret;
98}
99
100static void
101kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev,
102 int vector)
103{
104 if (unlikely(assigned_dev->irq_requested_type &
105 KVM_DEV_IRQ_GUEST_INTX)) {
cf9eeac4 106 spin_lock(&assigned_dev->intx_mask_lock);
07700a94
JK
107 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX))
108 kvm_set_irq(assigned_dev->kvm,
aa2fbe6d
YZ
109 assigned_dev->irq_source_id, vector, 1,
110 false);
cf9eeac4 111 spin_unlock(&assigned_dev->intx_mask_lock);
07700a94
JK
112 } else
113 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
aa2fbe6d 114 vector, 1, false);
07700a94
JK
115}
116
117static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
118{
119 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
120
121 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
122 spin_lock_irq(&assigned_dev->intx_lock);
0645211c
JK
123 disable_irq_nosync(irq);
124 assigned_dev->host_irq_disabled = true;
07700a94 125 spin_unlock_irq(&assigned_dev->intx_lock);
0645211c 126 }
bfd99ff5 127
07700a94
JK
128 kvm_assigned_dev_raise_guest_irq(assigned_dev,
129 assigned_dev->guest_irq);
130
131 return IRQ_HANDLED;
132}
133
134#ifdef __KVM_HAVE_MSI
78c63440
MT
135static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
136{
137 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
138 int ret = kvm_set_irq_inatomic(assigned_dev->kvm,
139 assigned_dev->irq_source_id,
140 assigned_dev->guest_irq, 1);
141 return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
142}
143
07700a94
JK
144static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
145{
146 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
147
148 kvm_assigned_dev_raise_guest_irq(assigned_dev,
149 assigned_dev->guest_irq);
cc079396
JK
150
151 return IRQ_HANDLED;
152}
07700a94 153#endif
cc079396
JK
154
155#ifdef __KVM_HAVE_MSIX
78c63440
MT
156static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
157{
158 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
159 int index = find_index_from_host_irq(assigned_dev, irq);
160 u32 vector;
161 int ret = 0;
162
163 if (index >= 0) {
164 vector = assigned_dev->guest_msix_entries[index].vector;
165 ret = kvm_set_irq_inatomic(assigned_dev->kvm,
166 assigned_dev->irq_source_id,
167 vector, 1);
168 }
169
170 return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
171}
172
cc079396
JK
173static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
174{
175 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
176 int index = find_index_from_host_irq(assigned_dev, irq);
177 u32 vector;
178
179 if (index >= 0) {
180 vector = assigned_dev->guest_msix_entries[index].vector;
07700a94 181 kvm_assigned_dev_raise_guest_irq(assigned_dev, vector);
cc079396 182 }
bfd99ff5 183
bfd99ff5
AK
184 return IRQ_HANDLED;
185}
cc079396 186#endif
bfd99ff5
AK
187
188/* Ack the irq line for an assigned device */
189static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
190{
c61fa9d6
JK
191 struct kvm_assigned_dev_kernel *dev =
192 container_of(kian, struct kvm_assigned_dev_kernel,
193 ack_notifier);
bfd99ff5 194
aa2fbe6d 195 kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false);
bfd99ff5 196
cf9eeac4 197 spin_lock(&dev->intx_mask_lock);
07700a94
JK
198
199 if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) {
200 bool reassert = false;
201
202 spin_lock_irq(&dev->intx_lock);
203 /*
204 * The guest IRQ may be shared so this ack can come from an
205 * IRQ for another guest device.
206 */
207 if (dev->host_irq_disabled) {
208 if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3))
209 enable_irq(dev->host_irq);
210 else if (!pci_check_and_unmask_intx(dev->dev))
211 reassert = true;
212 dev->host_irq_disabled = reassert;
213 }
214 spin_unlock_irq(&dev->intx_lock);
215
216 if (reassert)
217 kvm_set_irq(dev->kvm, dev->irq_source_id,
aa2fbe6d 218 dev->guest_irq, 1, false);
bfd99ff5 219 }
07700a94 220
cf9eeac4 221 spin_unlock(&dev->intx_mask_lock);
bfd99ff5
AK
222}
223
224static void deassign_guest_irq(struct kvm *kvm,
225 struct kvm_assigned_dev_kernel *assigned_dev)
226{
c61fa9d6
JK
227 if (assigned_dev->ack_notifier.gsi != -1)
228 kvm_unregister_irq_ack_notifier(kvm,
229 &assigned_dev->ack_notifier);
bfd99ff5 230
0c106b5a 231 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
aa2fbe6d 232 assigned_dev->guest_irq, 0, false);
0c106b5a 233
bfd99ff5
AK
234 if (assigned_dev->irq_source_id != -1)
235 kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
236 assigned_dev->irq_source_id = -1;
237 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
238}
239
240/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
241static void deassign_host_irq(struct kvm *kvm,
242 struct kvm_assigned_dev_kernel *assigned_dev)
243{
244 /*
0645211c 245 * We disable irq here to prevent further events.
bfd99ff5
AK
246 *
247 * Notice this maybe result in nested disable if the interrupt type is
248 * INTx, but it's OK for we are going to free it.
249 *
250 * If this function is a part of VM destroy, please ensure that till
251 * now, the kvm state is still legal for probably we also have to wait
0645211c 252 * on a currently running IRQ handler.
bfd99ff5
AK
253 */
254 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
255 int i;
256 for (i = 0; i < assigned_dev->entries_nr; i++)
0645211c 257 disable_irq(assigned_dev->host_msix_entries[i].vector);
bfd99ff5
AK
258
259 for (i = 0; i < assigned_dev->entries_nr; i++)
260 free_irq(assigned_dev->host_msix_entries[i].vector,
9f9f6b78 261 assigned_dev);
bfd99ff5
AK
262
263 assigned_dev->entries_nr = 0;
264 kfree(assigned_dev->host_msix_entries);
265 kfree(assigned_dev->guest_msix_entries);
266 pci_disable_msix(assigned_dev->dev);
267 } else {
268 /* Deal with MSI and INTx */
07700a94
JK
269 if ((assigned_dev->irq_requested_type &
270 KVM_DEV_IRQ_HOST_INTX) &&
271 (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
272 spin_lock_irq(&assigned_dev->intx_lock);
273 pci_intx(assigned_dev->dev, false);
274 spin_unlock_irq(&assigned_dev->intx_lock);
275 synchronize_irq(assigned_dev->host_irq);
276 } else
277 disable_irq(assigned_dev->host_irq);
bfd99ff5 278
9f9f6b78 279 free_irq(assigned_dev->host_irq, assigned_dev);
bfd99ff5
AK
280
281 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
282 pci_disable_msi(assigned_dev->dev);
283 }
284
285 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
286}
287
288static int kvm_deassign_irq(struct kvm *kvm,
289 struct kvm_assigned_dev_kernel *assigned_dev,
290 unsigned long irq_requested_type)
291{
292 unsigned long guest_irq_type, host_irq_type;
293
294 if (!irqchip_in_kernel(kvm))
295 return -EINVAL;
296 /* no irq assignment to deassign */
297 if (!assigned_dev->irq_requested_type)
298 return -ENXIO;
299
300 host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
301 guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
302
303 if (host_irq_type)
304 deassign_host_irq(kvm, assigned_dev);
305 if (guest_irq_type)
306 deassign_guest_irq(kvm, assigned_dev);
307
308 return 0;
309}
310
311static void kvm_free_assigned_irq(struct kvm *kvm,
312 struct kvm_assigned_dev_kernel *assigned_dev)
313{
314 kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
315}
316
317static void kvm_free_assigned_device(struct kvm *kvm,
318 struct kvm_assigned_dev_kernel
319 *assigned_dev)
320{
321 kvm_free_assigned_irq(kvm, assigned_dev);
322
f8fcfd77
AW
323 pci_reset_function(assigned_dev->dev);
324 if (pci_load_and_free_saved_state(assigned_dev->dev,
325 &assigned_dev->pci_saved_state))
326 printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
327 __func__, dev_name(&assigned_dev->dev->dev));
328 else
329 pci_restore_state(assigned_dev->dev);
bfd99ff5 330
ad0d217c 331 pci_clear_dev_assigned(assigned_dev->dev);
6777829c 332
bfd99ff5
AK
333 pci_release_regions(assigned_dev->dev);
334 pci_disable_device(assigned_dev->dev);
335 pci_dev_put(assigned_dev->dev);
336
337 list_del(&assigned_dev->list);
338 kfree(assigned_dev);
339}
340
341void kvm_free_all_assigned_devices(struct kvm *kvm)
342{
343 struct list_head *ptr, *ptr2;
344 struct kvm_assigned_dev_kernel *assigned_dev;
345
346 list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
347 assigned_dev = list_entry(ptr,
348 struct kvm_assigned_dev_kernel,
349 list);
350
351 kvm_free_assigned_device(kvm, assigned_dev);
352 }
353}
354
355static int assigned_device_enable_host_intx(struct kvm *kvm,
356 struct kvm_assigned_dev_kernel *dev)
357{
07700a94
JK
358 irq_handler_t irq_handler;
359 unsigned long flags;
360
bfd99ff5 361 dev->host_irq = dev->dev->irq;
07700a94
JK
362
363 /*
364 * We can only share the IRQ line with other host devices if we are
365 * able to disable the IRQ source at device-level - independently of
366 * the guest driver. Otherwise host devices may suffer from unbounded
367 * IRQ latencies when the guest keeps the line asserted.
bfd99ff5 368 */
07700a94
JK
369 if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
370 irq_handler = kvm_assigned_dev_intx;
371 flags = IRQF_SHARED;
372 } else {
373 irq_handler = NULL;
374 flags = IRQF_ONESHOT;
375 }
376 if (request_threaded_irq(dev->host_irq, irq_handler,
377 kvm_assigned_dev_thread_intx, flags,
378 dev->irq_name, dev))
bfd99ff5 379 return -EIO;
07700a94
JK
380
381 if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
382 spin_lock_irq(&dev->intx_lock);
383 pci_intx(dev->dev, true);
384 spin_unlock_irq(&dev->intx_lock);
385 }
bfd99ff5
AK
386 return 0;
387}
388
389#ifdef __KVM_HAVE_MSI
390static int assigned_device_enable_host_msi(struct kvm *kvm,
391 struct kvm_assigned_dev_kernel *dev)
392{
393 int r;
394
395 if (!dev->dev->msi_enabled) {
396 r = pci_enable_msi(dev->dev);
397 if (r)
398 return r;
399 }
400
401 dev->host_irq = dev->dev->irq;
a76beb14 402 if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi,
07700a94
JK
403 kvm_assigned_dev_thread_msi, 0,
404 dev->irq_name, dev)) {
bfd99ff5
AK
405 pci_disable_msi(dev->dev);
406 return -EIO;
407 }
408
409 return 0;
410}
411#endif
412
413#ifdef __KVM_HAVE_MSIX
414static int assigned_device_enable_host_msix(struct kvm *kvm,
415 struct kvm_assigned_dev_kernel *dev)
416{
417 int i, r = -EINVAL;
418
419 /* host_msix_entries and guest_msix_entries should have been
420 * initialized */
421 if (dev->entries_nr == 0)
422 return r;
423
e8e249d7
AG
424 r = pci_enable_msix_exact(dev->dev,
425 dev->host_msix_entries, dev->entries_nr);
bfd99ff5
AK
426 if (r)
427 return r;
428
429 for (i = 0; i < dev->entries_nr; i++) {
0645211c 430 r = request_threaded_irq(dev->host_msix_entries[i].vector,
a76beb14
AW
431 kvm_assigned_dev_msix,
432 kvm_assigned_dev_thread_msix,
9f9f6b78 433 0, dev->irq_name, dev);
bfd99ff5 434 if (r)
d57e2c07 435 goto err;
bfd99ff5
AK
436 }
437
438 return 0;
d57e2c07 439err:
440 for (i -= 1; i >= 0; i--)
9f9f6b78 441 free_irq(dev->host_msix_entries[i].vector, dev);
d57e2c07 442 pci_disable_msix(dev->dev);
443 return r;
bfd99ff5
AK
444}
445
446#endif
447
448static int assigned_device_enable_guest_intx(struct kvm *kvm,
449 struct kvm_assigned_dev_kernel *dev,
450 struct kvm_assigned_irq *irq)
451{
452 dev->guest_irq = irq->guest_irq;
453 dev->ack_notifier.gsi = irq->guest_irq;
454 return 0;
455}
456
457#ifdef __KVM_HAVE_MSI
458static int assigned_device_enable_guest_msi(struct kvm *kvm,
459 struct kvm_assigned_dev_kernel *dev,
460 struct kvm_assigned_irq *irq)
461{
462 dev->guest_irq = irq->guest_irq;
463 dev->ack_notifier.gsi = -1;
bfd99ff5
AK
464 return 0;
465}
466#endif
467
468#ifdef __KVM_HAVE_MSIX
469static int assigned_device_enable_guest_msix(struct kvm *kvm,
470 struct kvm_assigned_dev_kernel *dev,
471 struct kvm_assigned_irq *irq)
472{
473 dev->guest_irq = irq->guest_irq;
474 dev->ack_notifier.gsi = -1;
bfd99ff5
AK
475 return 0;
476}
477#endif
478
479static int assign_host_irq(struct kvm *kvm,
480 struct kvm_assigned_dev_kernel *dev,
481 __u32 host_irq_type)
482{
483 int r = -EEXIST;
484
485 if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
486 return r;
487
1e001d49
JK
488 snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s",
489 pci_name(dev->dev));
490
bfd99ff5
AK
491 switch (host_irq_type) {
492 case KVM_DEV_IRQ_HOST_INTX:
493 r = assigned_device_enable_host_intx(kvm, dev);
494 break;
495#ifdef __KVM_HAVE_MSI
496 case KVM_DEV_IRQ_HOST_MSI:
497 r = assigned_device_enable_host_msi(kvm, dev);
498 break;
499#endif
500#ifdef __KVM_HAVE_MSIX
501 case KVM_DEV_IRQ_HOST_MSIX:
502 r = assigned_device_enable_host_msix(kvm, dev);
503 break;
504#endif
505 default:
506 r = -EINVAL;
507 }
07700a94 508 dev->host_irq_disabled = false;
bfd99ff5
AK
509
510 if (!r)
511 dev->irq_requested_type |= host_irq_type;
512
513 return r;
514}
515
516static int assign_guest_irq(struct kvm *kvm,
517 struct kvm_assigned_dev_kernel *dev,
518 struct kvm_assigned_irq *irq,
519 unsigned long guest_irq_type)
520{
521 int id;
522 int r = -EEXIST;
523
524 if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
525 return r;
526
527 id = kvm_request_irq_source_id(kvm);
528 if (id < 0)
529 return id;
530
531 dev->irq_source_id = id;
532
533 switch (guest_irq_type) {
534 case KVM_DEV_IRQ_GUEST_INTX:
535 r = assigned_device_enable_guest_intx(kvm, dev, irq);
536 break;
537#ifdef __KVM_HAVE_MSI
538 case KVM_DEV_IRQ_GUEST_MSI:
539 r = assigned_device_enable_guest_msi(kvm, dev, irq);
540 break;
541#endif
542#ifdef __KVM_HAVE_MSIX
543 case KVM_DEV_IRQ_GUEST_MSIX:
544 r = assigned_device_enable_guest_msix(kvm, dev, irq);
545 break;
546#endif
547 default:
548 r = -EINVAL;
549 }
550
551 if (!r) {
552 dev->irq_requested_type |= guest_irq_type;
c61fa9d6
JK
553 if (dev->ack_notifier.gsi != -1)
554 kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
30d1e0e8 555 } else {
bfd99ff5 556 kvm_free_irq_source_id(kvm, dev->irq_source_id);
30d1e0e8
CG
557 dev->irq_source_id = -1;
558 }
bfd99ff5
AK
559
560 return r;
561}
562
563/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
564static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
565 struct kvm_assigned_irq *assigned_irq)
566{
567 int r = -EINVAL;
568 struct kvm_assigned_dev_kernel *match;
569 unsigned long host_irq_type, guest_irq_type;
570
bfd99ff5
AK
571 if (!irqchip_in_kernel(kvm))
572 return r;
573
574 mutex_lock(&kvm->lock);
575 r = -ENODEV;
576 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
577 assigned_irq->assigned_dev_id);
578 if (!match)
579 goto out;
580
581 host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
582 guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
583
584 r = -EINVAL;
585 /* can only assign one type at a time */
586 if (hweight_long(host_irq_type) > 1)
587 goto out;
588 if (hweight_long(guest_irq_type) > 1)
589 goto out;
590 if (host_irq_type == 0 && guest_irq_type == 0)
591 goto out;
592
593 r = 0;
594 if (host_irq_type)
595 r = assign_host_irq(kvm, match, host_irq_type);
596 if (r)
597 goto out;
598
599 if (guest_irq_type)
600 r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
601out:
602 mutex_unlock(&kvm->lock);
603 return r;
604}
605
606static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
607 struct kvm_assigned_irq
608 *assigned_irq)
609{
610 int r = -ENODEV;
611 struct kvm_assigned_dev_kernel *match;
07700a94 612 unsigned long irq_type;
bfd99ff5
AK
613
614 mutex_lock(&kvm->lock);
615
616 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
617 assigned_irq->assigned_dev_id);
618 if (!match)
619 goto out;
620
07700a94
JK
621 irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK |
622 KVM_DEV_IRQ_GUEST_MASK);
623 r = kvm_deassign_irq(kvm, match, irq_type);
bfd99ff5
AK
624out:
625 mutex_unlock(&kvm->lock);
626 return r;
627}
628
3d27e23b
AW
629/*
630 * We want to test whether the caller has been granted permissions to
631 * use this device. To be able to configure and control the device,
632 * the user needs access to PCI configuration space and BAR resources.
633 * These are accessed through PCI sysfs. PCI config space is often
634 * passed to the process calling this ioctl via file descriptor, so we
635 * can't rely on access to that file. We can check for permissions
636 * on each of the BAR resource files, which is a pretty clear
637 * indicator that the user has been granted access to the device.
638 */
639static int probe_sysfs_permissions(struct pci_dev *dev)
640{
641#ifdef CONFIG_SYSFS
642 int i;
643 bool bar_found = false;
644
645 for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
646 char *kpath, *syspath;
647 struct path path;
648 struct inode *inode;
649 int r;
650
651 if (!pci_resource_len(dev, i))
652 continue;
653
654 kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
655 if (!kpath)
656 return -ENOMEM;
657
658 /* Per sysfs-rules, sysfs is always at /sys */
659 syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
660 kfree(kpath);
661 if (!syspath)
662 return -ENOMEM;
663
664 r = kern_path(syspath, LOOKUP_FOLLOW, &path);
665 kfree(syspath);
666 if (r)
667 return r;
668
bb668734 669 inode = d_backing_inode(path.dentry);
3d27e23b
AW
670
671 r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
672 path_put(&path);
673 if (r)
674 return r;
675
676 bar_found = true;
677 }
678
679 /* If no resources, probably something special */
680 if (!bar_found)
681 return -EPERM;
682
683 return 0;
684#else
685 return -EINVAL; /* No way to control the device without sysfs */
686#endif
687}
688
bfd99ff5
AK
689static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
690 struct kvm_assigned_pci_dev *assigned_dev)
691{
bc6678a3 692 int r = 0, idx;
bfd99ff5
AK
693 struct kvm_assigned_dev_kernel *match;
694 struct pci_dev *dev;
695
42387373
AW
696 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
697 return -EINVAL;
698
bfd99ff5 699 mutex_lock(&kvm->lock);
bc6678a3 700 idx = srcu_read_lock(&kvm->srcu);
bfd99ff5
AK
701
702 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
703 assigned_dev->assigned_dev_id);
704 if (match) {
705 /* device already assigned */
706 r = -EEXIST;
707 goto out;
708 }
709
710 match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
711 if (match == NULL) {
712 printk(KERN_INFO "%s: Couldn't allocate memory\n",
713 __func__);
714 r = -ENOMEM;
715 goto out;
716 }
ab9f4ecb
ZE
717 dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
718 assigned_dev->busnr,
bfd99ff5
AK
719 assigned_dev->devfn);
720 if (!dev) {
721 printk(KERN_INFO "%s: host device not found\n", __func__);
722 r = -EINVAL;
723 goto out_free;
724 }
3d27e23b
AW
725
726 /* Don't allow bridges to be assigned */
f961f728 727 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) {
3d27e23b
AW
728 r = -EPERM;
729 goto out_put;
730 }
731
732 r = probe_sysfs_permissions(dev);
733 if (r)
734 goto out_put;
735
bfd99ff5
AK
736 if (pci_enable_device(dev)) {
737 printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
738 r = -EBUSY;
739 goto out_put;
740 }
741 r = pci_request_regions(dev, "kvm_assigned_device");
742 if (r) {
743 printk(KERN_INFO "%s: Could not get access to device regions\n",
744 __func__);
745 goto out_disable;
746 }
747
748 pci_reset_function(dev);
ed78661f 749 pci_save_state(dev);
f8fcfd77
AW
750 match->pci_saved_state = pci_store_saved_state(dev);
751 if (!match->pci_saved_state)
752 printk(KERN_DEBUG "%s: Couldn't store %s saved state\n",
753 __func__, dev_name(&dev->dev));
07700a94
JK
754
755 if (!pci_intx_mask_supported(dev))
756 assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3;
757
bfd99ff5 758 match->assigned_dev_id = assigned_dev->assigned_dev_id;
ab9f4ecb 759 match->host_segnr = assigned_dev->segnr;
bfd99ff5
AK
760 match->host_busnr = assigned_dev->busnr;
761 match->host_devfn = assigned_dev->devfn;
762 match->flags = assigned_dev->flags;
763 match->dev = dev;
0645211c 764 spin_lock_init(&match->intx_lock);
cf9eeac4 765 spin_lock_init(&match->intx_mask_lock);
bfd99ff5
AK
766 match->irq_source_id = -1;
767 match->kvm = kvm;
768 match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
bfd99ff5
AK
769
770 list_add(&match->list, &kvm->arch.assigned_dev_head);
771
42387373
AW
772 if (!kvm->arch.iommu_domain) {
773 r = kvm_iommu_map_guest(kvm);
bfd99ff5
AK
774 if (r)
775 goto out_list_del;
776 }
c9eab58f 777 r = kvm_assign_device(kvm, match->dev);
42387373
AW
778 if (r)
779 goto out_list_del;
bfd99ff5
AK
780
781out:
bc6678a3 782 srcu_read_unlock(&kvm->srcu, idx);
fae3a353 783 mutex_unlock(&kvm->lock);
bfd99ff5
AK
784 return r;
785out_list_del:
f8fcfd77
AW
786 if (pci_load_and_free_saved_state(dev, &match->pci_saved_state))
787 printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
788 __func__, dev_name(&dev->dev));
bfd99ff5
AK
789 list_del(&match->list);
790 pci_release_regions(dev);
791out_disable:
792 pci_disable_device(dev);
793out_put:
794 pci_dev_put(dev);
795out_free:
796 kfree(match);
bc6678a3 797 srcu_read_unlock(&kvm->srcu, idx);
fae3a353 798 mutex_unlock(&kvm->lock);
bfd99ff5
AK
799 return r;
800}
801
802static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
803 struct kvm_assigned_pci_dev *assigned_dev)
804{
805 int r = 0;
806 struct kvm_assigned_dev_kernel *match;
807
808 mutex_lock(&kvm->lock);
809
810 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
811 assigned_dev->assigned_dev_id);
812 if (!match) {
813 printk(KERN_INFO "%s: device hasn't been assigned before, "
814 "so cannot be deassigned\n", __func__);
815 r = -EINVAL;
816 goto out;
817 }
818
c9eab58f 819 kvm_deassign_device(kvm, match->dev);
bfd99ff5
AK
820
821 kvm_free_assigned_device(kvm, match);
822
823out:
824 mutex_unlock(&kvm->lock);
825 return r;
826}
827
828
829#ifdef __KVM_HAVE_MSIX
830static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
831 struct kvm_assigned_msix_nr *entry_nr)
832{
833 int r = 0;
834 struct kvm_assigned_dev_kernel *adev;
835
836 mutex_lock(&kvm->lock);
837
838 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
839 entry_nr->assigned_dev_id);
840 if (!adev) {
841 r = -EINVAL;
842 goto msix_nr_out;
843 }
844
845 if (adev->entries_nr == 0) {
846 adev->entries_nr = entry_nr->entry_nr;
847 if (adev->entries_nr == 0 ||
9f3191ae 848 adev->entries_nr > KVM_MAX_MSIX_PER_DEV) {
bfd99ff5
AK
849 r = -EINVAL;
850 goto msix_nr_out;
851 }
852
853 adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
854 entry_nr->entry_nr,
855 GFP_KERNEL);
856 if (!adev->host_msix_entries) {
857 r = -ENOMEM;
858 goto msix_nr_out;
859 }
0645211c
JK
860 adev->guest_msix_entries =
861 kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr,
862 GFP_KERNEL);
bfd99ff5
AK
863 if (!adev->guest_msix_entries) {
864 kfree(adev->host_msix_entries);
865 r = -ENOMEM;
866 goto msix_nr_out;
867 }
868 } else /* Not allowed set MSI-X number twice */
869 r = -EINVAL;
870msix_nr_out:
871 mutex_unlock(&kvm->lock);
872 return r;
873}
874
875static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
876 struct kvm_assigned_msix_entry *entry)
877{
878 int r = 0, i;
879 struct kvm_assigned_dev_kernel *adev;
880
881 mutex_lock(&kvm->lock);
882
883 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
884 entry->assigned_dev_id);
885
886 if (!adev) {
887 r = -EINVAL;
888 goto msix_entry_out;
889 }
890
891 for (i = 0; i < adev->entries_nr; i++)
892 if (adev->guest_msix_entries[i].vector == 0 ||
893 adev->guest_msix_entries[i].entry == entry->entry) {
894 adev->guest_msix_entries[i].entry = entry->entry;
895 adev->guest_msix_entries[i].vector = entry->gsi;
896 adev->host_msix_entries[i].entry = entry->entry;
897 break;
898 }
899 if (i == adev->entries_nr) {
900 r = -ENOSPC;
901 goto msix_entry_out;
902 }
903
904msix_entry_out:
905 mutex_unlock(&kvm->lock);
906
907 return r;
908}
909#endif
910
07700a94
JK
911static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,
912 struct kvm_assigned_pci_dev *assigned_dev)
913{
914 int r = 0;
915 struct kvm_assigned_dev_kernel *match;
916
917 mutex_lock(&kvm->lock);
918
919 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
920 assigned_dev->assigned_dev_id);
921 if (!match) {
922 r = -ENODEV;
923 goto out;
924 }
925
cf9eeac4 926 spin_lock(&match->intx_mask_lock);
07700a94
JK
927
928 match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX;
929 match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX;
930
931 if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
932 if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) {
933 kvm_set_irq(match->kvm, match->irq_source_id,
aa2fbe6d 934 match->guest_irq, 0, false);
07700a94
JK
935 /*
936 * Masking at hardware-level is performed on demand,
937 * i.e. when an IRQ actually arrives at the host.
938 */
939 } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
940 /*
941 * Unmask the IRQ line if required. Unmasking at
942 * device level will be performed by user space.
943 */
944 spin_lock_irq(&match->intx_lock);
945 if (match->host_irq_disabled) {
946 enable_irq(match->host_irq);
947 match->host_irq_disabled = false;
948 }
949 spin_unlock_irq(&match->intx_lock);
950 }
951 }
952
cf9eeac4 953 spin_unlock(&match->intx_mask_lock);
07700a94
JK
954
955out:
956 mutex_unlock(&kvm->lock);
957 return r;
958}
959
bfd99ff5
AK
960long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
961 unsigned long arg)
962{
963 void __user *argp = (void __user *)arg;
51de271d 964 int r;
bfd99ff5
AK
965
966 switch (ioctl) {
967 case KVM_ASSIGN_PCI_DEVICE: {
968 struct kvm_assigned_pci_dev assigned_dev;
969
970 r = -EFAULT;
971 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
972 goto out;
973 r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
974 if (r)
975 goto out;
976 break;
977 }
978 case KVM_ASSIGN_IRQ: {
979 r = -EOPNOTSUPP;
980 break;
981 }
bfd99ff5
AK
982 case KVM_ASSIGN_DEV_IRQ: {
983 struct kvm_assigned_irq assigned_irq;
984
985 r = -EFAULT;
986 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
987 goto out;
988 r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
989 if (r)
990 goto out;
991 break;
992 }
993 case KVM_DEASSIGN_DEV_IRQ: {
994 struct kvm_assigned_irq assigned_irq;
995
996 r = -EFAULT;
997 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
998 goto out;
999 r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
1000 if (r)
1001 goto out;
1002 break;
1003 }
bfd99ff5
AK
1004 case KVM_DEASSIGN_PCI_DEVICE: {
1005 struct kvm_assigned_pci_dev assigned_dev;
1006
1007 r = -EFAULT;
1008 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
1009 goto out;
1010 r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
1011 if (r)
1012 goto out;
1013 break;
1014 }
bfd99ff5
AK
1015#ifdef __KVM_HAVE_MSIX
1016 case KVM_ASSIGN_SET_MSIX_NR: {
1017 struct kvm_assigned_msix_nr entry_nr;
1018 r = -EFAULT;
1019 if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
1020 goto out;
1021 r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
1022 if (r)
1023 goto out;
1024 break;
1025 }
1026 case KVM_ASSIGN_SET_MSIX_ENTRY: {
1027 struct kvm_assigned_msix_entry entry;
1028 r = -EFAULT;
1029 if (copy_from_user(&entry, argp, sizeof entry))
1030 goto out;
1031 r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
1032 if (r)
1033 goto out;
1034 break;
1035 }
1036#endif
07700a94
JK
1037 case KVM_ASSIGN_SET_INTX_MASK: {
1038 struct kvm_assigned_pci_dev assigned_dev;
1039
1040 r = -EFAULT;
1041 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
1042 goto out;
1043 r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev);
1044 break;
1045 }
51de271d
JK
1046 default:
1047 r = -ENOTTY;
1048 break;
bfd99ff5
AK
1049 }
1050out:
1051 return r;
1052}