Commit | Line | Data |
---|---|---|
d2912cb1 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
7e992d69 AM |
2 | /* |
3 | * VFIO generic eventfd code for IRQFD support. | |
4 | * Derived from drivers/vfio/pci/vfio_pci_intrs.c | |
5 | * | |
6 | * Copyright (C) 2012 Red Hat, Inc. All rights reserved. | |
7 | * Author: Alex Williamson <alex.williamson@redhat.com> | |
7e992d69 AM |
8 | */ |
9 | ||
10 | #include <linux/vfio.h> | |
11 | #include <linux/eventfd.h> | |
12 | #include <linux/file.h> | |
71be3423 | 13 | #include <linux/module.h> |
7e992d69 | 14 | #include <linux/slab.h> |
e2d55709 | 15 | #include "vfio.h" |
71be3423 | 16 | |
7e992d69 | 17 | static struct workqueue_struct *vfio_irqfd_cleanup_wq; |
66fdc052 | 18 | static DEFINE_SPINLOCK(virqfd_lock); |
7e992d69 | 19 | |
e2d55709 | 20 | int __init vfio_virqfd_init(void) |
7e992d69 AM |
21 | { |
22 | vfio_irqfd_cleanup_wq = | |
23 | create_singlethread_workqueue("vfio-irqfd-cleanup"); | |
24 | if (!vfio_irqfd_cleanup_wq) | |
25 | return -ENOMEM; | |
26 | ||
27 | return 0; | |
28 | } | |
29 | ||
e2d55709 | 30 | void vfio_virqfd_exit(void) |
7e992d69 AM |
31 | { |
32 | destroy_workqueue(vfio_irqfd_cleanup_wq); | |
33 | } | |
34 | ||
35 | static void virqfd_deactivate(struct virqfd *virqfd) | |
36 | { | |
37 | queue_work(vfio_irqfd_cleanup_wq, &virqfd->shutdown); | |
38 | } | |
39 | ||
ac6424b9 | 40 | static int virqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) |
7e992d69 AM |
41 | { |
42 | struct virqfd *virqfd = container_of(wait, struct virqfd, wait); | |
3ad6f93e | 43 | __poll_t flags = key_to_poll(key); |
7e992d69 | 44 | |
a9a08845 | 45 | if (flags & EPOLLIN) { |
b1b397ae DW |
46 | u64 cnt; |
47 | eventfd_ctx_do_read(virqfd->eventfd, &cnt); | |
48 | ||
7e992d69 AM |
49 | /* An event has been signaled, call function */ |
50 | if ((!virqfd->handler || | |
51 | virqfd->handler(virqfd->opaque, virqfd->data)) && | |
52 | virqfd->thread) | |
53 | schedule_work(&virqfd->inject); | |
54 | } | |
55 | ||
a9a08845 | 56 | if (flags & EPOLLHUP) { |
7e992d69 AM |
57 | unsigned long flags; |
58 | spin_lock_irqsave(&virqfd_lock, flags); | |
59 | ||
60 | /* | |
61 | * The eventfd is closing, if the virqfd has not yet been | |
62 | * queued for release, as determined by testing whether the | |
63 | * virqfd pointer to it is still valid, queue it now. As | |
64 | * with kvm irqfds, we know we won't race against the virqfd | |
65 | * going away because we hold the lock to get here. | |
66 | */ | |
67 | if (*(virqfd->pvirqfd) == virqfd) { | |
68 | *(virqfd->pvirqfd) = NULL; | |
69 | virqfd_deactivate(virqfd); | |
70 | } | |
71 | ||
72 | spin_unlock_irqrestore(&virqfd_lock, flags); | |
73 | } | |
74 | ||
75 | return 0; | |
76 | } | |
77 | ||
78 | static void virqfd_ptable_queue_proc(struct file *file, | |
79 | wait_queue_head_t *wqh, poll_table *pt) | |
80 | { | |
81 | struct virqfd *virqfd = container_of(pt, struct virqfd, pt); | |
82 | add_wait_queue(wqh, &virqfd->wait); | |
83 | } | |
84 | ||
85 | static void virqfd_shutdown(struct work_struct *work) | |
86 | { | |
87 | struct virqfd *virqfd = container_of(work, struct virqfd, shutdown); | |
88 | u64 cnt; | |
89 | ||
90 | eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt); | |
91 | flush_work(&virqfd->inject); | |
92 | eventfd_ctx_put(virqfd->eventfd); | |
93 | ||
94 | kfree(virqfd); | |
95 | } | |
96 | ||
97 | static void virqfd_inject(struct work_struct *work) | |
98 | { | |
99 | struct virqfd *virqfd = container_of(work, struct virqfd, inject); | |
100 | if (virqfd->thread) | |
101 | virqfd->thread(virqfd->opaque, virqfd->data); | |
102 | } | |
103 | ||
b620ecbd AW |
104 | static void virqfd_flush_inject(struct work_struct *work) |
105 | { | |
106 | struct virqfd *virqfd = container_of(work, struct virqfd, flush_inject); | |
107 | ||
108 | flush_work(&virqfd->inject); | |
109 | } | |
110 | ||
7e992d69 AM |
111 | int vfio_virqfd_enable(void *opaque, |
112 | int (*handler)(void *, void *), | |
113 | void (*thread)(void *, void *), | |
114 | void *data, struct virqfd **pvirqfd, int fd) | |
115 | { | |
116 | struct fd irqfd; | |
117 | struct eventfd_ctx *ctx; | |
118 | struct virqfd *virqfd; | |
119 | int ret = 0; | |
e6c8adca | 120 | __poll_t events; |
7e992d69 | 121 | |
0886196c | 122 | virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL_ACCOUNT); |
7e992d69 AM |
123 | if (!virqfd) |
124 | return -ENOMEM; | |
125 | ||
126 | virqfd->pvirqfd = pvirqfd; | |
127 | virqfd->opaque = opaque; | |
128 | virqfd->handler = handler; | |
129 | virqfd->thread = thread; | |
130 | virqfd->data = data; | |
131 | ||
132 | INIT_WORK(&virqfd->shutdown, virqfd_shutdown); | |
133 | INIT_WORK(&virqfd->inject, virqfd_inject); | |
b620ecbd | 134 | INIT_WORK(&virqfd->flush_inject, virqfd_flush_inject); |
7e992d69 AM |
135 | |
136 | irqfd = fdget(fd); | |
137 | if (!irqfd.file) { | |
138 | ret = -EBADF; | |
139 | goto err_fd; | |
140 | } | |
141 | ||
142 | ctx = eventfd_ctx_fileget(irqfd.file); | |
143 | if (IS_ERR(ctx)) { | |
144 | ret = PTR_ERR(ctx); | |
145 | goto err_ctx; | |
146 | } | |
147 | ||
148 | virqfd->eventfd = ctx; | |
149 | ||
150 | /* | |
151 | * virqfds can be released by closing the eventfd or directly | |
152 | * through ioctl. These are both done through a workqueue, so | |
153 | * we update the pointer to the virqfd under lock to avoid | |
154 | * pushing multiple jobs to release the same virqfd. | |
155 | */ | |
156 | spin_lock_irq(&virqfd_lock); | |
157 | ||
158 | if (*pvirqfd) { | |
159 | spin_unlock_irq(&virqfd_lock); | |
160 | ret = -EBUSY; | |
161 | goto err_busy; | |
162 | } | |
163 | *pvirqfd = virqfd; | |
164 | ||
165 | spin_unlock_irq(&virqfd_lock); | |
166 | ||
167 | /* | |
168 | * Install our own custom wake-up handling so we are notified via | |
169 | * a callback whenever someone signals the underlying eventfd. | |
170 | */ | |
171 | init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup); | |
172 | init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc); | |
173 | ||
9965ed17 | 174 | events = vfs_poll(irqfd.file, &virqfd->pt); |
7e992d69 AM |
175 | |
176 | /* | |
177 | * Check if there was an event already pending on the eventfd | |
178 | * before we registered and trigger it as if we didn't miss it. | |
179 | */ | |
a9a08845 | 180 | if (events & EPOLLIN) { |
7e992d69 AM |
181 | if ((!handler || handler(opaque, data)) && thread) |
182 | schedule_work(&virqfd->inject); | |
183 | } | |
184 | ||
185 | /* | |
186 | * Do not drop the file until the irqfd is fully initialized, | |
a9a08845 | 187 | * otherwise we might race against the EPOLLHUP. |
7e992d69 AM |
188 | */ |
189 | fdput(irqfd); | |
190 | ||
191 | return 0; | |
192 | err_busy: | |
193 | eventfd_ctx_put(ctx); | |
194 | err_ctx: | |
195 | fdput(irqfd); | |
196 | err_fd: | |
197 | kfree(virqfd); | |
198 | ||
199 | return ret; | |
200 | } | |
201 | EXPORT_SYMBOL_GPL(vfio_virqfd_enable); | |
202 | ||
203 | void vfio_virqfd_disable(struct virqfd **pvirqfd) | |
204 | { | |
205 | unsigned long flags; | |
206 | ||
207 | spin_lock_irqsave(&virqfd_lock, flags); | |
208 | ||
209 | if (*pvirqfd) { | |
210 | virqfd_deactivate(*pvirqfd); | |
211 | *pvirqfd = NULL; | |
212 | } | |
213 | ||
214 | spin_unlock_irqrestore(&virqfd_lock, flags); | |
215 | ||
216 | /* | |
217 | * Block until we know all outstanding shutdown jobs have completed. | |
218 | * Even if we don't queue the job, flush the wq to be sure it's | |
219 | * been released. | |
220 | */ | |
221 | flush_workqueue(vfio_irqfd_cleanup_wq); | |
222 | } | |
223 | EXPORT_SYMBOL_GPL(vfio_virqfd_disable); | |
b620ecbd AW |
224 | |
225 | void vfio_virqfd_flush_thread(struct virqfd **pvirqfd) | |
226 | { | |
227 | unsigned long flags; | |
228 | ||
229 | spin_lock_irqsave(&virqfd_lock, flags); | |
230 | if (*pvirqfd && (*pvirqfd)->thread) | |
231 | queue_work(vfio_irqfd_cleanup_wq, &(*pvirqfd)->flush_inject); | |
232 | spin_unlock_irqrestore(&virqfd_lock, flags); | |
233 | ||
234 | flush_workqueue(vfio_irqfd_cleanup_wq); | |
235 | } | |
236 | EXPORT_SYMBOL_GPL(vfio_virqfd_flush_thread); |