KVM: introduce kvm_arch functions for IRQ bypass
[linux-2.6-block.git] / virt / kvm / eventfd.c
1 /*
2  * kvm eventfd support - use eventfd objects to signal various KVM events
3  *
4  * Copyright 2009 Novell.  All Rights Reserved.
5  * Copyright 2010 Red Hat, Inc. and/or its affiliates.
6  *
7  * Author:
8  *      Gregory Haskins <ghaskins@novell.com>
9  *
10  * This file is free software; you can redistribute it and/or modify
11  * it under the terms of version 2 of the GNU General Public License
12  * as published by the Free Software Foundation.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software Foundation,
21  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
22  */
23
24 #include <linux/kvm_host.h>
25 #include <linux/kvm.h>
26 #include <linux/kvm_irqfd.h>
27 #include <linux/workqueue.h>
28 #include <linux/syscalls.h>
29 #include <linux/wait.h>
30 #include <linux/poll.h>
31 #include <linux/file.h>
32 #include <linux/list.h>
33 #include <linux/eventfd.h>
34 #include <linux/kernel.h>
35 #include <linux/srcu.h>
36 #include <linux/slab.h>
37 #include <linux/seqlock.h>
38 #include <trace/events/kvm.h>
39
40 #include <kvm/iodev.h>
41
42 #ifdef CONFIG_HAVE_KVM_IRQFD
43
44 static struct workqueue_struct *irqfd_cleanup_wq;
45
46 static void
47 irqfd_inject(struct work_struct *work)
48 {
49         struct kvm_kernel_irqfd *irqfd =
50                 container_of(work, struct kvm_kernel_irqfd, inject);
51         struct kvm *kvm = irqfd->kvm;
52
53         if (!irqfd->resampler) {
54                 kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1,
55                                 false);
56                 kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0,
57                                 false);
58         } else
59                 kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
60                             irqfd->gsi, 1, false);
61 }
62
63 /*
64  * Since resampler irqfds share an IRQ source ID, we de-assert once
65  * then notify all of the resampler irqfds using this GSI.  We can't
66  * do multiple de-asserts or we risk racing with incoming re-asserts.
67  */
68 static void
69 irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
70 {
71         struct kvm_kernel_irqfd_resampler *resampler;
72         struct kvm *kvm;
73         struct kvm_kernel_irqfd *irqfd;
74         int idx;
75
76         resampler = container_of(kian,
77                         struct kvm_kernel_irqfd_resampler, notifier);
78         kvm = resampler->kvm;
79
80         kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
81                     resampler->notifier.gsi, 0, false);
82
83         idx = srcu_read_lock(&kvm->irq_srcu);
84
85         list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
86                 eventfd_signal(irqfd->resamplefd, 1);
87
88         srcu_read_unlock(&kvm->irq_srcu, idx);
89 }
90
91 static void
92 irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd)
93 {
94         struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler;
95         struct kvm *kvm = resampler->kvm;
96
97         mutex_lock(&kvm->irqfds.resampler_lock);
98
99         list_del_rcu(&irqfd->resampler_link);
100         synchronize_srcu(&kvm->irq_srcu);
101
102         if (list_empty(&resampler->list)) {
103                 list_del(&resampler->link);
104                 kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier);
105                 kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
106                             resampler->notifier.gsi, 0, false);
107                 kfree(resampler);
108         }
109
110         mutex_unlock(&kvm->irqfds.resampler_lock);
111 }
112
113 /*
114  * Race-free decouple logic (ordering is critical)
115  */
116 static void
117 irqfd_shutdown(struct work_struct *work)
118 {
119         struct kvm_kernel_irqfd *irqfd =
120                 container_of(work, struct kvm_kernel_irqfd, shutdown);
121         u64 cnt;
122
123         /*
124          * Synchronize with the wait-queue and unhook ourselves to prevent
125          * further events.
126          */
127         eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt);
128
129         /*
130          * We know no new events will be scheduled at this point, so block
131          * until all previously outstanding events have completed
132          */
133         flush_work(&irqfd->inject);
134
135         if (irqfd->resampler) {
136                 irqfd_resampler_shutdown(irqfd);
137                 eventfd_ctx_put(irqfd->resamplefd);
138         }
139
140         /*
141          * It is now safe to release the object's resources
142          */
143         eventfd_ctx_put(irqfd->eventfd);
144         kfree(irqfd);
145 }
146
147
148 /* assumes kvm->irqfds.lock is held */
149 static bool
150 irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
151 {
152         return list_empty(&irqfd->list) ? false : true;
153 }
154
155 /*
156  * Mark the irqfd as inactive and schedule it for removal
157  *
158  * assumes kvm->irqfds.lock is held
159  */
160 static void
161 irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
162 {
163         BUG_ON(!irqfd_is_active(irqfd));
164
165         list_del_init(&irqfd->list);
166
167         queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
168 }
169
170 /*
171  * Called with wqh->lock held and interrupts disabled
172  */
173 static int
174 irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
175 {
176         struct kvm_kernel_irqfd *irqfd =
177                 container_of(wait, struct kvm_kernel_irqfd, wait);
178         unsigned long flags = (unsigned long)key;
179         struct kvm_kernel_irq_routing_entry irq;
180         struct kvm *kvm = irqfd->kvm;
181         unsigned seq;
182         int idx;
183
184         if (flags & POLLIN) {
185                 idx = srcu_read_lock(&kvm->irq_srcu);
186                 do {
187                         seq = read_seqcount_begin(&irqfd->irq_entry_sc);
188                         irq = irqfd->irq_entry;
189                 } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
190                 /* An event has been signaled, inject an interrupt */
191                 if (irq.type == KVM_IRQ_ROUTING_MSI)
192                         kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
193                                         false);
194                 else
195                         schedule_work(&irqfd->inject);
196                 srcu_read_unlock(&kvm->irq_srcu, idx);
197         }
198
199         if (flags & POLLHUP) {
200                 /* The eventfd is closing, detach from KVM */
201                 unsigned long flags;
202
203                 spin_lock_irqsave(&kvm->irqfds.lock, flags);
204
205                 /*
206                  * We must check if someone deactivated the irqfd before
207                  * we could acquire the irqfds.lock since the item is
208                  * deactivated from the KVM side before it is unhooked from
209                  * the wait-queue.  If it is already deactivated, we can
210                  * simply return knowing the other side will cleanup for us.
211                  * We cannot race against the irqfd going away since the
212                  * other side is required to acquire wqh->lock, which we hold
213                  */
214                 if (irqfd_is_active(irqfd))
215                         irqfd_deactivate(irqfd);
216
217                 spin_unlock_irqrestore(&kvm->irqfds.lock, flags);
218         }
219
220         return 0;
221 }
222
223 static void
224 irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
225                         poll_table *pt)
226 {
227         struct kvm_kernel_irqfd *irqfd =
228                 container_of(pt, struct kvm_kernel_irqfd, pt);
229         add_wait_queue(wqh, &irqfd->wait);
230 }
231
232 /* Must be called under irqfds.lock */
233 static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
234 {
235         struct kvm_kernel_irq_routing_entry *e;
236         struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
237         int i, n_entries;
238
239         n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
240
241         write_seqcount_begin(&irqfd->irq_entry_sc);
242
243         irqfd->irq_entry.type = 0;
244
245         e = entries;
246         for (i = 0; i < n_entries; ++i, ++e) {
247                 /* Only fast-path MSI. */
248                 if (e->type == KVM_IRQ_ROUTING_MSI)
249                         irqfd->irq_entry = *e;
250         }
251
252         write_seqcount_end(&irqfd->irq_entry_sc);
253 }
254
255 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
256 void __attribute__((weak)) kvm_arch_irq_bypass_stop(
257                                 struct irq_bypass_consumer *cons)
258 {
259 }
260
261 void __attribute__((weak)) kvm_arch_irq_bypass_start(
262                                 struct irq_bypass_consumer *cons)
263 {
264 }
265 #endif
266
267 static int
268 kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
269 {
270         struct kvm_kernel_irqfd *irqfd, *tmp;
271         struct fd f;
272         struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
273         int ret;
274         unsigned int events;
275         int idx;
276
277         if (!kvm_arch_intc_initialized(kvm))
278                 return -EAGAIN;
279
280         irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
281         if (!irqfd)
282                 return -ENOMEM;
283
284         irqfd->kvm = kvm;
285         irqfd->gsi = args->gsi;
286         INIT_LIST_HEAD(&irqfd->list);
287         INIT_WORK(&irqfd->inject, irqfd_inject);
288         INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
289         seqcount_init(&irqfd->irq_entry_sc);
290
291         f = fdget(args->fd);
292         if (!f.file) {
293                 ret = -EBADF;
294                 goto out;
295         }
296
297         eventfd = eventfd_ctx_fileget(f.file);
298         if (IS_ERR(eventfd)) {
299                 ret = PTR_ERR(eventfd);
300                 goto fail;
301         }
302
303         irqfd->eventfd = eventfd;
304
305         if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) {
306                 struct kvm_kernel_irqfd_resampler *resampler;
307
308                 resamplefd = eventfd_ctx_fdget(args->resamplefd);
309                 if (IS_ERR(resamplefd)) {
310                         ret = PTR_ERR(resamplefd);
311                         goto fail;
312                 }
313
314                 irqfd->resamplefd = resamplefd;
315                 INIT_LIST_HEAD(&irqfd->resampler_link);
316
317                 mutex_lock(&kvm->irqfds.resampler_lock);
318
319                 list_for_each_entry(resampler,
320                                     &kvm->irqfds.resampler_list, link) {
321                         if (resampler->notifier.gsi == irqfd->gsi) {
322                                 irqfd->resampler = resampler;
323                                 break;
324                         }
325                 }
326
327                 if (!irqfd->resampler) {
328                         resampler = kzalloc(sizeof(*resampler), GFP_KERNEL);
329                         if (!resampler) {
330                                 ret = -ENOMEM;
331                                 mutex_unlock(&kvm->irqfds.resampler_lock);
332                                 goto fail;
333                         }
334
335                         resampler->kvm = kvm;
336                         INIT_LIST_HEAD(&resampler->list);
337                         resampler->notifier.gsi = irqfd->gsi;
338                         resampler->notifier.irq_acked = irqfd_resampler_ack;
339                         INIT_LIST_HEAD(&resampler->link);
340
341                         list_add(&resampler->link, &kvm->irqfds.resampler_list);
342                         kvm_register_irq_ack_notifier(kvm,
343                                                       &resampler->notifier);
344                         irqfd->resampler = resampler;
345                 }
346
347                 list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
348                 synchronize_srcu(&kvm->irq_srcu);
349
350                 mutex_unlock(&kvm->irqfds.resampler_lock);
351         }
352
353         /*
354          * Install our own custom wake-up handling so we are notified via
355          * a callback whenever someone signals the underlying eventfd
356          */
357         init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
358         init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
359
360         spin_lock_irq(&kvm->irqfds.lock);
361
362         ret = 0;
363         list_for_each_entry(tmp, &kvm->irqfds.items, list) {
364                 if (irqfd->eventfd != tmp->eventfd)
365                         continue;
366                 /* This fd is used for another irq already. */
367                 ret = -EBUSY;
368                 spin_unlock_irq(&kvm->irqfds.lock);
369                 goto fail;
370         }
371
372         idx = srcu_read_lock(&kvm->irq_srcu);
373         irqfd_update(kvm, irqfd);
374         srcu_read_unlock(&kvm->irq_srcu, idx);
375
376         list_add_tail(&irqfd->list, &kvm->irqfds.items);
377
378         spin_unlock_irq(&kvm->irqfds.lock);
379
380         /*
381          * Check if there was an event already pending on the eventfd
382          * before we registered, and trigger it as if we didn't miss it.
383          */
384         events = f.file->f_op->poll(f.file, &irqfd->pt);
385
386         if (events & POLLIN)
387                 schedule_work(&irqfd->inject);
388
389         /*
390          * do not drop the file until the irqfd is fully initialized, otherwise
391          * we might race against the POLLHUP
392          */
393         fdput(f);
394
395         return 0;
396
397 fail:
398         if (irqfd->resampler)
399                 irqfd_resampler_shutdown(irqfd);
400
401         if (resamplefd && !IS_ERR(resamplefd))
402                 eventfd_ctx_put(resamplefd);
403
404         if (eventfd && !IS_ERR(eventfd))
405                 eventfd_ctx_put(eventfd);
406
407         fdput(f);
408
409 out:
410         kfree(irqfd);
411         return ret;
412 }
413
414 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
415 {
416         struct kvm_irq_ack_notifier *kian;
417         int gsi, idx;
418
419         idx = srcu_read_lock(&kvm->irq_srcu);
420         gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
421         if (gsi != -1)
422                 hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
423                                          link)
424                         if (kian->gsi == gsi) {
425                                 srcu_read_unlock(&kvm->irq_srcu, idx);
426                                 return true;
427                         }
428
429         srcu_read_unlock(&kvm->irq_srcu, idx);
430
431         return false;
432 }
433 EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
434
435 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
436 {
437         struct kvm_irq_ack_notifier *kian;
438         int gsi, idx;
439
440         trace_kvm_ack_irq(irqchip, pin);
441
442         idx = srcu_read_lock(&kvm->irq_srcu);
443         gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
444         if (gsi != -1)
445                 hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
446                                          link)
447                         if (kian->gsi == gsi)
448                                 kian->irq_acked(kian);
449         srcu_read_unlock(&kvm->irq_srcu, idx);
450 }
451
452 void kvm_register_irq_ack_notifier(struct kvm *kvm,
453                                    struct kvm_irq_ack_notifier *kian)
454 {
455         mutex_lock(&kvm->irq_lock);
456         hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
457         mutex_unlock(&kvm->irq_lock);
458         kvm_vcpu_request_scan_ioapic(kvm);
459 }
460
461 void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
462                                     struct kvm_irq_ack_notifier *kian)
463 {
464         mutex_lock(&kvm->irq_lock);
465         hlist_del_init_rcu(&kian->link);
466         mutex_unlock(&kvm->irq_lock);
467         synchronize_srcu(&kvm->irq_srcu);
468         kvm_vcpu_request_scan_ioapic(kvm);
469 }
470 #endif
471
472 void
473 kvm_eventfd_init(struct kvm *kvm)
474 {
475 #ifdef CONFIG_HAVE_KVM_IRQFD
476         spin_lock_init(&kvm->irqfds.lock);
477         INIT_LIST_HEAD(&kvm->irqfds.items);
478         INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
479         mutex_init(&kvm->irqfds.resampler_lock);
480 #endif
481         INIT_LIST_HEAD(&kvm->ioeventfds);
482 }
483
484 #ifdef CONFIG_HAVE_KVM_IRQFD
485 /*
486  * shutdown any irqfd's that match fd+gsi
487  */
488 static int
489 kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
490 {
491         struct kvm_kernel_irqfd *irqfd, *tmp;
492         struct eventfd_ctx *eventfd;
493
494         eventfd = eventfd_ctx_fdget(args->fd);
495         if (IS_ERR(eventfd))
496                 return PTR_ERR(eventfd);
497
498         spin_lock_irq(&kvm->irqfds.lock);
499
500         list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
501                 if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
502                         /*
503                          * This clearing of irq_entry.type is needed for when
504                          * another thread calls kvm_irq_routing_update before
505                          * we flush workqueue below (we synchronize with
506                          * kvm_irq_routing_update using irqfds.lock).
507                          */
508                         write_seqcount_begin(&irqfd->irq_entry_sc);
509                         irqfd->irq_entry.type = 0;
510                         write_seqcount_end(&irqfd->irq_entry_sc);
511                         irqfd_deactivate(irqfd);
512                 }
513         }
514
515         spin_unlock_irq(&kvm->irqfds.lock);
516         eventfd_ctx_put(eventfd);
517
518         /*
519          * Block until we know all outstanding shutdown jobs have completed
520          * so that we guarantee there will not be any more interrupts on this
521          * gsi once this deassign function returns.
522          */
523         flush_workqueue(irqfd_cleanup_wq);
524
525         return 0;
526 }
527
528 int
529 kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
530 {
531         if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE))
532                 return -EINVAL;
533
534         if (args->flags & KVM_IRQFD_FLAG_DEASSIGN)
535                 return kvm_irqfd_deassign(kvm, args);
536
537         return kvm_irqfd_assign(kvm, args);
538 }
539
540 /*
541  * This function is called as the kvm VM fd is being released. Shutdown all
542  * irqfds that still remain open
543  */
544 void
545 kvm_irqfd_release(struct kvm *kvm)
546 {
547         struct kvm_kernel_irqfd *irqfd, *tmp;
548
549         spin_lock_irq(&kvm->irqfds.lock);
550
551         list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list)
552                 irqfd_deactivate(irqfd);
553
554         spin_unlock_irq(&kvm->irqfds.lock);
555
556         /*
557          * Block until we know all outstanding shutdown jobs have completed
558          * since we do not take a kvm* reference.
559          */
560         flush_workqueue(irqfd_cleanup_wq);
561
562 }
563
564 /*
565  * Take note of a change in irq routing.
566  * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
567  */
568 void kvm_irq_routing_update(struct kvm *kvm)
569 {
570         struct kvm_kernel_irqfd *irqfd;
571
572         spin_lock_irq(&kvm->irqfds.lock);
573
574         list_for_each_entry(irqfd, &kvm->irqfds.items, list)
575                 irqfd_update(kvm, irqfd);
576
577         spin_unlock_irq(&kvm->irqfds.lock);
578 }
579
580 /*
581  * create a host-wide workqueue for issuing deferred shutdown requests
582  * aggregated from all vm* instances. We need our own isolated single-thread
583  * queue to prevent deadlock against flushing the normal work-queue.
584  */
585 int kvm_irqfd_init(void)
586 {
587         irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup");
588         if (!irqfd_cleanup_wq)
589                 return -ENOMEM;
590
591         return 0;
592 }
593
594 void kvm_irqfd_exit(void)
595 {
596         destroy_workqueue(irqfd_cleanup_wq);
597 }
598 #endif
599
600 /*
601  * --------------------------------------------------------------------
602  * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
603  *
604  * userspace can register a PIO/MMIO address with an eventfd for receiving
605  * notification when the memory has been touched.
606  * --------------------------------------------------------------------
607  */
608
609 struct _ioeventfd {
610         struct list_head     list;
611         u64                  addr;
612         int                  length;
613         struct eventfd_ctx  *eventfd;
614         u64                  datamatch;
615         struct kvm_io_device dev;
616         u8                   bus_idx;
617         bool                 wildcard;
618 };
619
620 static inline struct _ioeventfd *
621 to_ioeventfd(struct kvm_io_device *dev)
622 {
623         return container_of(dev, struct _ioeventfd, dev);
624 }
625
626 static void
627 ioeventfd_release(struct _ioeventfd *p)
628 {
629         eventfd_ctx_put(p->eventfd);
630         list_del(&p->list);
631         kfree(p);
632 }
633
634 static bool
635 ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
636 {
637         u64 _val;
638
639         if (addr != p->addr)
640                 /* address must be precise for a hit */
641                 return false;
642
643         if (!p->length)
644                 /* length = 0 means only look at the address, so always a hit */
645                 return true;
646
647         if (len != p->length)
648                 /* address-range must be precise for a hit */
649                 return false;
650
651         if (p->wildcard)
652                 /* all else equal, wildcard is always a hit */
653                 return true;
654
655         /* otherwise, we have to actually compare the data */
656
657         BUG_ON(!IS_ALIGNED((unsigned long)val, len));
658
659         switch (len) {
660         case 1:
661                 _val = *(u8 *)val;
662                 break;
663         case 2:
664                 _val = *(u16 *)val;
665                 break;
666         case 4:
667                 _val = *(u32 *)val;
668                 break;
669         case 8:
670                 _val = *(u64 *)val;
671                 break;
672         default:
673                 return false;
674         }
675
676         return _val == p->datamatch ? true : false;
677 }
678
679 /* MMIO/PIO writes trigger an event if the addr/val match */
680 static int
681 ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
682                 int len, const void *val)
683 {
684         struct _ioeventfd *p = to_ioeventfd(this);
685
686         if (!ioeventfd_in_range(p, addr, len, val))
687                 return -EOPNOTSUPP;
688
689         eventfd_signal(p->eventfd, 1);
690         return 0;
691 }
692
693 /*
694  * This function is called as KVM is completely shutting down.  We do not
695  * need to worry about locking just nuke anything we have as quickly as possible
696  */
697 static void
698 ioeventfd_destructor(struct kvm_io_device *this)
699 {
700         struct _ioeventfd *p = to_ioeventfd(this);
701
702         ioeventfd_release(p);
703 }
704
705 static const struct kvm_io_device_ops ioeventfd_ops = {
706         .write      = ioeventfd_write,
707         .destructor = ioeventfd_destructor,
708 };
709
710 /* assumes kvm->slots_lock held */
711 static bool
712 ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p)
713 {
714         struct _ioeventfd *_p;
715
716         list_for_each_entry(_p, &kvm->ioeventfds, list)
717                 if (_p->bus_idx == p->bus_idx &&
718                     _p->addr == p->addr &&
719                     (!_p->length || !p->length ||
720                      (_p->length == p->length &&
721                       (_p->wildcard || p->wildcard ||
722                        _p->datamatch == p->datamatch))))
723                         return true;
724
725         return false;
726 }
727
728 static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags)
729 {
730         if (flags & KVM_IOEVENTFD_FLAG_PIO)
731                 return KVM_PIO_BUS;
732         if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY)
733                 return KVM_VIRTIO_CCW_NOTIFY_BUS;
734         return KVM_MMIO_BUS;
735 }
736
737 static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
738                                 enum kvm_bus bus_idx,
739                                 struct kvm_ioeventfd *args)
740 {
741
742         struct eventfd_ctx *eventfd;
743         struct _ioeventfd *p;
744         int ret;
745
746         eventfd = eventfd_ctx_fdget(args->fd);
747         if (IS_ERR(eventfd))
748                 return PTR_ERR(eventfd);
749
750         p = kzalloc(sizeof(*p), GFP_KERNEL);
751         if (!p) {
752                 ret = -ENOMEM;
753                 goto fail;
754         }
755
756         INIT_LIST_HEAD(&p->list);
757         p->addr    = args->addr;
758         p->bus_idx = bus_idx;
759         p->length  = args->len;
760         p->eventfd = eventfd;
761
762         /* The datamatch feature is optional, otherwise this is a wildcard */
763         if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
764                 p->datamatch = args->datamatch;
765         else
766                 p->wildcard = true;
767
768         mutex_lock(&kvm->slots_lock);
769
770         /* Verify that there isn't a match already */
771         if (ioeventfd_check_collision(kvm, p)) {
772                 ret = -EEXIST;
773                 goto unlock_fail;
774         }
775
776         kvm_iodevice_init(&p->dev, &ioeventfd_ops);
777
778         ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
779                                       &p->dev);
780         if (ret < 0)
781                 goto unlock_fail;
782
783         kvm->buses[bus_idx]->ioeventfd_count++;
784         list_add_tail(&p->list, &kvm->ioeventfds);
785
786         mutex_unlock(&kvm->slots_lock);
787
788         return 0;
789
790 unlock_fail:
791         mutex_unlock(&kvm->slots_lock);
792
793 fail:
794         kfree(p);
795         eventfd_ctx_put(eventfd);
796
797         return ret;
798 }
799
800 static int
801 kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
802                            struct kvm_ioeventfd *args)
803 {
804         struct _ioeventfd        *p, *tmp;
805         struct eventfd_ctx       *eventfd;
806         int                       ret = -ENOENT;
807
808         eventfd = eventfd_ctx_fdget(args->fd);
809         if (IS_ERR(eventfd))
810                 return PTR_ERR(eventfd);
811
812         mutex_lock(&kvm->slots_lock);
813
814         list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
815                 bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
816
817                 if (p->bus_idx != bus_idx ||
818                     p->eventfd != eventfd  ||
819                     p->addr != args->addr  ||
820                     p->length != args->len ||
821                     p->wildcard != wildcard)
822                         continue;
823
824                 if (!p->wildcard && p->datamatch != args->datamatch)
825                         continue;
826
827                 kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
828                 kvm->buses[bus_idx]->ioeventfd_count--;
829                 ioeventfd_release(p);
830                 ret = 0;
831                 break;
832         }
833
834         mutex_unlock(&kvm->slots_lock);
835
836         eventfd_ctx_put(eventfd);
837
838         return ret;
839 }
840
841 static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
842 {
843         enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags);
844         int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
845
846         if (!args->len && bus_idx == KVM_MMIO_BUS)
847                 kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
848
849         return ret;
850 }
851
852 static int
853 kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
854 {
855         enum kvm_bus              bus_idx;
856         int ret;
857
858         bus_idx = ioeventfd_bus_from_flags(args->flags);
859         /* must be natural-word sized, or 0 to ignore length */
860         switch (args->len) {
861         case 0:
862         case 1:
863         case 2:
864         case 4:
865         case 8:
866                 break;
867         default:
868                 return -EINVAL;
869         }
870
871         /* check for range overflow */
872         if (args->addr + args->len < args->addr)
873                 return -EINVAL;
874
875         /* check for extra flags that we don't understand */
876         if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
877                 return -EINVAL;
878
879         /* ioeventfd with no length can't be combined with DATAMATCH */
880         if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH))
881                 return -EINVAL;
882
883         ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args);
884         if (ret)
885                 goto fail;
886
887         /* When length is ignored, MMIO is also put on a separate bus, for
888          * faster lookups.
889          */
890         if (!args->len && bus_idx == KVM_MMIO_BUS) {
891                 ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
892                 if (ret < 0)
893                         goto fast_fail;
894         }
895
896         return 0;
897
898 fast_fail:
899         kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
900 fail:
901         return ret;
902 }
903
904 int
905 kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
906 {
907         if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN)
908                 return kvm_deassign_ioeventfd(kvm, args);
909
910         return kvm_assign_ioeventfd(kvm, args);
911 }