Commit | Line | Data |
---|---|---|
4c8cf318 TB |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Copyright (C) 2018-2020 Intel Corporation. | |
4 | * Copyright (C) 2020 Red Hat, Inc. | |
5 | * | |
6 | * Author: Tiwei Bie <tiwei.bie@intel.com> | |
7 | * Jason Wang <jasowang@redhat.com> | |
8 | * | |
9 | * Thanks Michael S. Tsirkin for the valuable comments and | |
10 | * suggestions. And thanks to Cunming Liang and Zhihong Wang for all | |
11 | * their supports. | |
12 | */ | |
13 | ||
14 | #include <linux/kernel.h> | |
15 | #include <linux/module.h> | |
16 | #include <linux/cdev.h> | |
17 | #include <linux/device.h> | |
ddd89d0a | 18 | #include <linux/mm.h> |
4c8cf318 TB |
19 | #include <linux/iommu.h> |
20 | #include <linux/uuid.h> | |
21 | #include <linux/vdpa.h> | |
22 | #include <linux/nospec.h> | |
23 | #include <linux/vhost.h> | |
24 | #include <linux/virtio_net.h> | |
776f3950 | 25 | #include <linux/kernel.h> |
4c8cf318 TB |
26 | |
27 | #include "vhost.h" | |
28 | ||
653055b9 | 29 | enum { |
25abc060 JW |
30 | VHOST_VDPA_BACKEND_FEATURES = |
31 | (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) | | |
32 | (1ULL << VHOST_BACKEND_F_IOTLB_BATCH), | |
653055b9 JW |
33 | }; |
34 | ||
4c8cf318 TB |
35 | #define VHOST_VDPA_DEV_MAX (1U << MINORBITS) |
36 | ||
37 | struct vhost_vdpa { | |
38 | struct vhost_dev vdev; | |
39 | struct iommu_domain *domain; | |
40 | struct vhost_virtqueue *vqs; | |
41 | struct completion completion; | |
42 | struct vdpa_device *vdpa; | |
43 | struct device dev; | |
44 | struct cdev cdev; | |
45 | atomic_t opened; | |
46 | int nvqs; | |
47 | int virtio_id; | |
48 | int minor; | |
776f3950 | 49 | struct eventfd_ctx *config_ctx; |
25abc060 | 50 | int in_batch; |
4c8cf318 TB |
51 | }; |
52 | ||
53 | static DEFINE_IDA(vhost_vdpa_ida); | |
54 | ||
55 | static dev_t vhost_vdpa_major; | |
56 | ||
4c8cf318 TB |
57 | static void handle_vq_kick(struct vhost_work *work) |
58 | { | |
59 | struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, | |
60 | poll.work); | |
61 | struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev); | |
62 | const struct vdpa_config_ops *ops = v->vdpa->config; | |
63 | ||
64 | ops->kick_vq(v->vdpa, vq - v->vqs); | |
65 | } | |
66 | ||
67 | static irqreturn_t vhost_vdpa_virtqueue_cb(void *private) | |
68 | { | |
69 | struct vhost_virtqueue *vq = private; | |
265a0ad8 | 70 | struct eventfd_ctx *call_ctx = vq->call_ctx.ctx; |
4c8cf318 TB |
71 | |
72 | if (call_ctx) | |
73 | eventfd_signal(call_ctx, 1); | |
74 | ||
75 | return IRQ_HANDLED; | |
76 | } | |
77 | ||
776f3950 ZL |
78 | static irqreturn_t vhost_vdpa_config_cb(void *private) |
79 | { | |
80 | struct vhost_vdpa *v = private; | |
81 | struct eventfd_ctx *config_ctx = v->config_ctx; | |
82 | ||
83 | if (config_ctx) | |
84 | eventfd_signal(config_ctx, 1); | |
85 | ||
86 | return IRQ_HANDLED; | |
87 | } | |
88 | ||
2cf1ba9a ZL |
89 | static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) |
90 | { | |
91 | struct vhost_virtqueue *vq = &v->vqs[qid]; | |
92 | const struct vdpa_config_ops *ops = v->vdpa->config; | |
93 | struct vdpa_device *vdpa = v->vdpa; | |
94 | int ret, irq; | |
95 | ||
96 | if (!ops->get_vq_irq) | |
97 | return; | |
98 | ||
99 | irq = ops->get_vq_irq(vdpa, qid); | |
100 | spin_lock(&vq->call_ctx.ctx_lock); | |
101 | irq_bypass_unregister_producer(&vq->call_ctx.producer); | |
102 | if (!vq->call_ctx.ctx || irq < 0) { | |
103 | spin_unlock(&vq->call_ctx.ctx_lock); | |
104 | return; | |
105 | } | |
106 | ||
107 | vq->call_ctx.producer.token = vq->call_ctx.ctx; | |
108 | vq->call_ctx.producer.irq = irq; | |
109 | ret = irq_bypass_register_producer(&vq->call_ctx.producer); | |
110 | spin_unlock(&vq->call_ctx.ctx_lock); | |
111 | } | |
112 | ||
113 | static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) | |
114 | { | |
115 | struct vhost_virtqueue *vq = &v->vqs[qid]; | |
116 | ||
117 | spin_lock(&vq->call_ctx.ctx_lock); | |
118 | irq_bypass_unregister_producer(&vq->call_ctx.producer); | |
119 | spin_unlock(&vq->call_ctx.ctx_lock); | |
120 | } | |
121 | ||
4c8cf318 TB |
122 | static void vhost_vdpa_reset(struct vhost_vdpa *v) |
123 | { | |
124 | struct vdpa_device *vdpa = v->vdpa; | |
4c8cf318 | 125 | |
0d234007 | 126 | vdpa_reset(vdpa); |
25abc060 | 127 | v->in_batch = 0; |
4c8cf318 TB |
128 | } |
129 | ||
130 | static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp) | |
131 | { | |
132 | struct vdpa_device *vdpa = v->vdpa; | |
133 | const struct vdpa_config_ops *ops = vdpa->config; | |
134 | u32 device_id; | |
135 | ||
136 | device_id = ops->get_device_id(vdpa); | |
137 | ||
138 | if (copy_to_user(argp, &device_id, sizeof(device_id))) | |
139 | return -EFAULT; | |
140 | ||
141 | return 0; | |
142 | } | |
143 | ||
144 | static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp) | |
145 | { | |
146 | struct vdpa_device *vdpa = v->vdpa; | |
147 | const struct vdpa_config_ops *ops = vdpa->config; | |
148 | u8 status; | |
149 | ||
150 | status = ops->get_status(vdpa); | |
151 | ||
152 | if (copy_to_user(statusp, &status, sizeof(status))) | |
153 | return -EFAULT; | |
154 | ||
155 | return 0; | |
156 | } | |
157 | ||
158 | static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) | |
159 | { | |
160 | struct vdpa_device *vdpa = v->vdpa; | |
161 | const struct vdpa_config_ops *ops = vdpa->config; | |
2cf1ba9a ZL |
162 | u8 status, status_old; |
163 | int nvqs = v->nvqs; | |
164 | u16 i; | |
4c8cf318 TB |
165 | |
166 | if (copy_from_user(&status, statusp, sizeof(status))) | |
167 | return -EFAULT; | |
168 | ||
2cf1ba9a ZL |
169 | status_old = ops->get_status(vdpa); |
170 | ||
4c8cf318 TB |
171 | /* |
172 | * Userspace shouldn't remove status bits unless reset the | |
173 | * status to 0. | |
174 | */ | |
175 | if (status != 0 && (ops->get_status(vdpa) & ~status) != 0) | |
176 | return -EINVAL; | |
177 | ||
178 | ops->set_status(vdpa, status); | |
179 | ||
2cf1ba9a ZL |
180 | if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) |
181 | for (i = 0; i < nvqs; i++) | |
182 | vhost_vdpa_setup_vq_irq(v, i); | |
183 | ||
184 | if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK)) | |
185 | for (i = 0; i < nvqs; i++) | |
186 | vhost_vdpa_unsetup_vq_irq(v, i); | |
187 | ||
4c8cf318 TB |
188 | return 0; |
189 | } | |
190 | ||
191 | static int vhost_vdpa_config_validate(struct vhost_vdpa *v, | |
192 | struct vhost_vdpa_config *c) | |
193 | { | |
194 | long size = 0; | |
195 | ||
196 | switch (v->virtio_id) { | |
197 | case VIRTIO_ID_NET: | |
198 | size = sizeof(struct virtio_net_config); | |
199 | break; | |
200 | } | |
201 | ||
202 | if (c->len == 0) | |
203 | return -EINVAL; | |
204 | ||
205 | if (c->len > size - c->off) | |
206 | return -E2BIG; | |
207 | ||
208 | return 0; | |
209 | } | |
210 | ||
211 | static long vhost_vdpa_get_config(struct vhost_vdpa *v, | |
212 | struct vhost_vdpa_config __user *c) | |
213 | { | |
214 | struct vdpa_device *vdpa = v->vdpa; | |
4c8cf318 TB |
215 | struct vhost_vdpa_config config; |
216 | unsigned long size = offsetof(struct vhost_vdpa_config, buf); | |
217 | u8 *buf; | |
218 | ||
219 | if (copy_from_user(&config, c, size)) | |
220 | return -EFAULT; | |
221 | if (vhost_vdpa_config_validate(v, &config)) | |
222 | return -EINVAL; | |
223 | buf = kvzalloc(config.len, GFP_KERNEL); | |
224 | if (!buf) | |
225 | return -ENOMEM; | |
226 | ||
0d234007 | 227 | vdpa_get_config(vdpa, config.off, buf, config.len); |
4c8cf318 TB |
228 | |
229 | if (copy_to_user(c->buf, buf, config.len)) { | |
230 | kvfree(buf); | |
231 | return -EFAULT; | |
232 | } | |
233 | ||
234 | kvfree(buf); | |
235 | return 0; | |
236 | } | |
237 | ||
238 | static long vhost_vdpa_set_config(struct vhost_vdpa *v, | |
239 | struct vhost_vdpa_config __user *c) | |
240 | { | |
241 | struct vdpa_device *vdpa = v->vdpa; | |
242 | const struct vdpa_config_ops *ops = vdpa->config; | |
243 | struct vhost_vdpa_config config; | |
244 | unsigned long size = offsetof(struct vhost_vdpa_config, buf); | |
245 | u8 *buf; | |
246 | ||
247 | if (copy_from_user(&config, c, size)) | |
248 | return -EFAULT; | |
249 | if (vhost_vdpa_config_validate(v, &config)) | |
250 | return -EINVAL; | |
251 | buf = kvzalloc(config.len, GFP_KERNEL); | |
252 | if (!buf) | |
253 | return -ENOMEM; | |
254 | ||
255 | if (copy_from_user(buf, c->buf, config.len)) { | |
256 | kvfree(buf); | |
257 | return -EFAULT; | |
258 | } | |
259 | ||
260 | ops->set_config(vdpa, config.off, buf, config.len); | |
261 | ||
262 | kvfree(buf); | |
263 | return 0; | |
264 | } | |
265 | ||
266 | static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) | |
267 | { | |
268 | struct vdpa_device *vdpa = v->vdpa; | |
269 | const struct vdpa_config_ops *ops = vdpa->config; | |
270 | u64 features; | |
271 | ||
272 | features = ops->get_features(vdpa); | |
4c8cf318 TB |
273 | |
274 | if (copy_to_user(featurep, &features, sizeof(features))) | |
275 | return -EFAULT; | |
276 | ||
277 | return 0; | |
278 | } | |
279 | ||
280 | static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) | |
281 | { | |
282 | struct vdpa_device *vdpa = v->vdpa; | |
283 | const struct vdpa_config_ops *ops = vdpa->config; | |
284 | u64 features; | |
285 | ||
286 | /* | |
287 | * It's not allowed to change the features after they have | |
288 | * been negotiated. | |
289 | */ | |
290 | if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK) | |
291 | return -EBUSY; | |
292 | ||
293 | if (copy_from_user(&features, featurep, sizeof(features))) | |
294 | return -EFAULT; | |
295 | ||
0d234007 | 296 | if (vdpa_set_features(vdpa, features)) |
4c8cf318 TB |
297 | return -EINVAL; |
298 | ||
299 | return 0; | |
300 | } | |
301 | ||
302 | static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp) | |
303 | { | |
304 | struct vdpa_device *vdpa = v->vdpa; | |
305 | const struct vdpa_config_ops *ops = vdpa->config; | |
306 | u16 num; | |
307 | ||
308 | num = ops->get_vq_num_max(vdpa); | |
309 | ||
310 | if (copy_to_user(argp, &num, sizeof(num))) | |
311 | return -EFAULT; | |
312 | ||
313 | return 0; | |
314 | } | |
315 | ||
776f3950 ZL |
316 | static void vhost_vdpa_config_put(struct vhost_vdpa *v) |
317 | { | |
318 | if (v->config_ctx) | |
319 | eventfd_ctx_put(v->config_ctx); | |
320 | } | |
321 | ||
322 | static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) | |
323 | { | |
324 | struct vdpa_callback cb; | |
325 | int fd; | |
326 | struct eventfd_ctx *ctx; | |
327 | ||
328 | cb.callback = vhost_vdpa_config_cb; | |
329 | cb.private = v->vdpa; | |
330 | if (copy_from_user(&fd, argp, sizeof(fd))) | |
331 | return -EFAULT; | |
332 | ||
333 | ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd); | |
334 | swap(ctx, v->config_ctx); | |
335 | ||
336 | if (!IS_ERR_OR_NULL(ctx)) | |
337 | eventfd_ctx_put(ctx); | |
338 | ||
339 | if (IS_ERR(v->config_ctx)) | |
340 | return PTR_ERR(v->config_ctx); | |
341 | ||
342 | v->vdpa->config->set_config_cb(v->vdpa, &cb); | |
343 | ||
344 | return 0; | |
345 | } | |
2cf1ba9a | 346 | |
4c8cf318 TB |
347 | static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, |
348 | void __user *argp) | |
349 | { | |
350 | struct vdpa_device *vdpa = v->vdpa; | |
351 | const struct vdpa_config_ops *ops = vdpa->config; | |
aac50c0b | 352 | struct vdpa_vq_state vq_state; |
4c8cf318 TB |
353 | struct vdpa_callback cb; |
354 | struct vhost_virtqueue *vq; | |
355 | struct vhost_vring_state s; | |
653055b9 JW |
356 | u64 __user *featurep = argp; |
357 | u64 features; | |
4c8cf318 TB |
358 | u32 idx; |
359 | long r; | |
360 | ||
361 | r = get_user(idx, (u32 __user *)argp); | |
362 | if (r < 0) | |
363 | return r; | |
364 | ||
365 | if (idx >= v->nvqs) | |
366 | return -ENOBUFS; | |
367 | ||
368 | idx = array_index_nospec(idx, v->nvqs); | |
369 | vq = &v->vqs[idx]; | |
370 | ||
b0bd82bf JW |
371 | switch (cmd) { |
372 | case VHOST_VDPA_SET_VRING_ENABLE: | |
4c8cf318 TB |
373 | if (copy_from_user(&s, argp, sizeof(s))) |
374 | return -EFAULT; | |
375 | ops->set_vq_ready(vdpa, idx, s.num); | |
376 | return 0; | |
b0bd82bf | 377 | case VHOST_GET_VRING_BASE: |
23750e39 EC |
378 | r = ops->get_vq_state(v->vdpa, idx, &vq_state); |
379 | if (r) | |
380 | return r; | |
381 | ||
aac50c0b | 382 | vq->last_avail_idx = vq_state.avail_index; |
b0bd82bf | 383 | break; |
653055b9 JW |
384 | case VHOST_GET_BACKEND_FEATURES: |
385 | features = VHOST_VDPA_BACKEND_FEATURES; | |
386 | if (copy_to_user(featurep, &features, sizeof(features))) | |
387 | return -EFAULT; | |
388 | return 0; | |
389 | case VHOST_SET_BACKEND_FEATURES: | |
390 | if (copy_from_user(&features, featurep, sizeof(features))) | |
391 | return -EFAULT; | |
392 | if (features & ~VHOST_VDPA_BACKEND_FEATURES) | |
393 | return -EOPNOTSUPP; | |
394 | vhost_set_backend_features(&v->vdev, features); | |
395 | return 0; | |
b0bd82bf | 396 | } |
4c8cf318 TB |
397 | |
398 | r = vhost_vring_ioctl(&v->vdev, cmd, argp); | |
399 | if (r) | |
400 | return r; | |
401 | ||
402 | switch (cmd) { | |
403 | case VHOST_SET_VRING_ADDR: | |
404 | if (ops->set_vq_address(vdpa, idx, | |
405 | (u64)(uintptr_t)vq->desc, | |
406 | (u64)(uintptr_t)vq->avail, | |
407 | (u64)(uintptr_t)vq->used)) | |
408 | r = -EINVAL; | |
409 | break; | |
410 | ||
411 | case VHOST_SET_VRING_BASE: | |
aac50c0b EC |
412 | vq_state.avail_index = vq->last_avail_idx; |
413 | if (ops->set_vq_state(vdpa, idx, &vq_state)) | |
4c8cf318 TB |
414 | r = -EINVAL; |
415 | break; | |
416 | ||
417 | case VHOST_SET_VRING_CALL: | |
265a0ad8 | 418 | if (vq->call_ctx.ctx) { |
4c8cf318 TB |
419 | cb.callback = vhost_vdpa_virtqueue_cb; |
420 | cb.private = vq; | |
421 | } else { | |
422 | cb.callback = NULL; | |
423 | cb.private = NULL; | |
424 | } | |
425 | ops->set_vq_cb(vdpa, idx, &cb); | |
2cf1ba9a | 426 | vhost_vdpa_setup_vq_irq(v, idx); |
4c8cf318 TB |
427 | break; |
428 | ||
429 | case VHOST_SET_VRING_NUM: | |
430 | ops->set_vq_num(vdpa, idx, vq->num); | |
431 | break; | |
432 | } | |
433 | ||
434 | return r; | |
435 | } | |
436 | ||
437 | static long vhost_vdpa_unlocked_ioctl(struct file *filep, | |
438 | unsigned int cmd, unsigned long arg) | |
439 | { | |
440 | struct vhost_vdpa *v = filep->private_data; | |
441 | struct vhost_dev *d = &v->vdev; | |
442 | void __user *argp = (void __user *)arg; | |
443 | long r; | |
444 | ||
445 | mutex_lock(&d->mutex); | |
446 | ||
447 | switch (cmd) { | |
448 | case VHOST_VDPA_GET_DEVICE_ID: | |
449 | r = vhost_vdpa_get_device_id(v, argp); | |
450 | break; | |
451 | case VHOST_VDPA_GET_STATUS: | |
452 | r = vhost_vdpa_get_status(v, argp); | |
453 | break; | |
454 | case VHOST_VDPA_SET_STATUS: | |
455 | r = vhost_vdpa_set_status(v, argp); | |
456 | break; | |
457 | case VHOST_VDPA_GET_CONFIG: | |
458 | r = vhost_vdpa_get_config(v, argp); | |
459 | break; | |
460 | case VHOST_VDPA_SET_CONFIG: | |
461 | r = vhost_vdpa_set_config(v, argp); | |
462 | break; | |
463 | case VHOST_GET_FEATURES: | |
464 | r = vhost_vdpa_get_features(v, argp); | |
465 | break; | |
466 | case VHOST_SET_FEATURES: | |
467 | r = vhost_vdpa_set_features(v, argp); | |
468 | break; | |
469 | case VHOST_VDPA_GET_VRING_NUM: | |
470 | r = vhost_vdpa_get_vring_num(v, argp); | |
471 | break; | |
472 | case VHOST_SET_LOG_BASE: | |
473 | case VHOST_SET_LOG_FD: | |
474 | r = -ENOIOCTLCMD; | |
475 | break; | |
776f3950 ZL |
476 | case VHOST_VDPA_SET_CONFIG_CALL: |
477 | r = vhost_vdpa_set_config_call(v, argp); | |
478 | break; | |
4c8cf318 TB |
479 | default: |
480 | r = vhost_dev_ioctl(&v->vdev, cmd, argp); | |
481 | if (r == -ENOIOCTLCMD) | |
482 | r = vhost_vdpa_vring_ioctl(v, cmd, argp); | |
483 | break; | |
484 | } | |
485 | ||
486 | mutex_unlock(&d->mutex); | |
487 | return r; | |
488 | } | |
489 | ||
490 | static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) | |
491 | { | |
492 | struct vhost_dev *dev = &v->vdev; | |
493 | struct vhost_iotlb *iotlb = dev->iotlb; | |
494 | struct vhost_iotlb_map *map; | |
495 | struct page *page; | |
496 | unsigned long pfn, pinned; | |
497 | ||
498 | while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { | |
499 | pinned = map->size >> PAGE_SHIFT; | |
500 | for (pfn = map->addr >> PAGE_SHIFT; | |
501 | pinned > 0; pfn++, pinned--) { | |
502 | page = pfn_to_page(pfn); | |
503 | if (map->perm & VHOST_ACCESS_WO) | |
504 | set_page_dirty_lock(page); | |
505 | unpin_user_page(page); | |
506 | } | |
507 | atomic64_sub(map->size >> PAGE_SHIFT, &dev->mm->pinned_vm); | |
508 | vhost_iotlb_map_free(iotlb, map); | |
509 | } | |
510 | } | |
511 | ||
512 | static void vhost_vdpa_iotlb_free(struct vhost_vdpa *v) | |
513 | { | |
514 | struct vhost_dev *dev = &v->vdev; | |
515 | ||
516 | vhost_vdpa_iotlb_unmap(v, 0ULL, 0ULL - 1); | |
517 | kfree(dev->iotlb); | |
518 | dev->iotlb = NULL; | |
519 | } | |
520 | ||
521 | static int perm_to_iommu_flags(u32 perm) | |
522 | { | |
523 | int flags = 0; | |
524 | ||
525 | switch (perm) { | |
526 | case VHOST_ACCESS_WO: | |
527 | flags |= IOMMU_WRITE; | |
528 | break; | |
529 | case VHOST_ACCESS_RO: | |
530 | flags |= IOMMU_READ; | |
531 | break; | |
532 | case VHOST_ACCESS_RW: | |
533 | flags |= (IOMMU_WRITE | IOMMU_READ); | |
534 | break; | |
535 | default: | |
536 | WARN(1, "invalidate vhost IOTLB permission\n"); | |
537 | break; | |
538 | } | |
539 | ||
540 | return flags | IOMMU_CACHE; | |
541 | } | |
542 | ||
543 | static int vhost_vdpa_map(struct vhost_vdpa *v, | |
544 | u64 iova, u64 size, u64 pa, u32 perm) | |
545 | { | |
546 | struct vhost_dev *dev = &v->vdev; | |
547 | struct vdpa_device *vdpa = v->vdpa; | |
548 | const struct vdpa_config_ops *ops = vdpa->config; | |
549 | int r = 0; | |
550 | ||
551 | r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1, | |
552 | pa, perm); | |
553 | if (r) | |
554 | return r; | |
555 | ||
25abc060 | 556 | if (ops->dma_map) { |
4c8cf318 | 557 | r = ops->dma_map(vdpa, iova, size, pa, perm); |
25abc060 JW |
558 | } else if (ops->set_map) { |
559 | if (!v->in_batch) | |
560 | r = ops->set_map(vdpa, dev->iotlb); | |
561 | } else { | |
4c8cf318 TB |
562 | r = iommu_map(v->domain, iova, pa, size, |
563 | perm_to_iommu_flags(perm)); | |
25abc060 | 564 | } |
4c8cf318 TB |
565 | |
566 | return r; | |
567 | } | |
568 | ||
569 | static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size) | |
570 | { | |
571 | struct vhost_dev *dev = &v->vdev; | |
572 | struct vdpa_device *vdpa = v->vdpa; | |
573 | const struct vdpa_config_ops *ops = vdpa->config; | |
574 | ||
575 | vhost_vdpa_iotlb_unmap(v, iova, iova + size - 1); | |
576 | ||
25abc060 | 577 | if (ops->dma_map) { |
4c8cf318 | 578 | ops->dma_unmap(vdpa, iova, size); |
25abc060 JW |
579 | } else if (ops->set_map) { |
580 | if (!v->in_batch) | |
581 | ops->set_map(vdpa, dev->iotlb); | |
582 | } else { | |
4c8cf318 | 583 | iommu_unmap(v->domain, iova, size); |
25abc060 | 584 | } |
4c8cf318 TB |
585 | } |
586 | ||
587 | static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, | |
588 | struct vhost_iotlb_msg *msg) | |
589 | { | |
590 | struct vhost_dev *dev = &v->vdev; | |
591 | struct vhost_iotlb *iotlb = dev->iotlb; | |
592 | struct page **page_list; | |
593 | unsigned long list_size = PAGE_SIZE / sizeof(struct page *); | |
594 | unsigned int gup_flags = FOLL_LONGTERM; | |
595 | unsigned long npages, cur_base, map_pfn, last_pfn = 0; | |
596 | unsigned long locked, lock_limit, pinned, i; | |
597 | u64 iova = msg->iova; | |
598 | int ret = 0; | |
599 | ||
600 | if (vhost_iotlb_itree_first(iotlb, msg->iova, | |
601 | msg->iova + msg->size - 1)) | |
602 | return -EEXIST; | |
603 | ||
604 | page_list = (struct page **) __get_free_page(GFP_KERNEL); | |
605 | if (!page_list) | |
606 | return -ENOMEM; | |
607 | ||
608 | if (msg->perm & VHOST_ACCESS_WO) | |
609 | gup_flags |= FOLL_WRITE; | |
610 | ||
611 | npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT; | |
612 | if (!npages) | |
613 | return -EINVAL; | |
614 | ||
d8ed45c5 | 615 | mmap_read_lock(dev->mm); |
4c8cf318 TB |
616 | |
617 | locked = atomic64_add_return(npages, &dev->mm->pinned_vm); | |
618 | lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; | |
619 | ||
620 | if (locked > lock_limit) { | |
621 | ret = -ENOMEM; | |
622 | goto out; | |
623 | } | |
624 | ||
625 | cur_base = msg->uaddr & PAGE_MASK; | |
626 | iova &= PAGE_MASK; | |
627 | ||
628 | while (npages) { | |
629 | pinned = min_t(unsigned long, npages, list_size); | |
630 | ret = pin_user_pages(cur_base, pinned, | |
631 | gup_flags, page_list, NULL); | |
632 | if (ret != pinned) | |
633 | goto out; | |
634 | ||
635 | if (!last_pfn) | |
636 | map_pfn = page_to_pfn(page_list[0]); | |
637 | ||
638 | for (i = 0; i < ret; i++) { | |
639 | unsigned long this_pfn = page_to_pfn(page_list[i]); | |
640 | u64 csize; | |
641 | ||
642 | if (last_pfn && (this_pfn != last_pfn + 1)) { | |
643 | /* Pin a contiguous chunk of memory */ | |
644 | csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; | |
645 | if (vhost_vdpa_map(v, iova, csize, | |
646 | map_pfn << PAGE_SHIFT, | |
647 | msg->perm)) | |
648 | goto out; | |
649 | map_pfn = this_pfn; | |
650 | iova += csize; | |
651 | } | |
652 | ||
653 | last_pfn = this_pfn; | |
654 | } | |
655 | ||
656 | cur_base += ret << PAGE_SHIFT; | |
657 | npages -= ret; | |
658 | } | |
659 | ||
660 | /* Pin the rest chunk */ | |
661 | ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT, | |
662 | map_pfn << PAGE_SHIFT, msg->perm); | |
663 | out: | |
664 | if (ret) { | |
665 | vhost_vdpa_unmap(v, msg->iova, msg->size); | |
666 | atomic64_sub(npages, &dev->mm->pinned_vm); | |
667 | } | |
d8ed45c5 | 668 | mmap_read_unlock(dev->mm); |
4c8cf318 TB |
669 | free_page((unsigned long)page_list); |
670 | return ret; | |
671 | } | |
672 | ||
673 | static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, | |
674 | struct vhost_iotlb_msg *msg) | |
675 | { | |
676 | struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev); | |
25abc060 JW |
677 | struct vdpa_device *vdpa = v->vdpa; |
678 | const struct vdpa_config_ops *ops = vdpa->config; | |
4c8cf318 TB |
679 | int r = 0; |
680 | ||
681 | r = vhost_dev_check_owner(dev); | |
682 | if (r) | |
683 | return r; | |
684 | ||
685 | switch (msg->type) { | |
686 | case VHOST_IOTLB_UPDATE: | |
687 | r = vhost_vdpa_process_iotlb_update(v, msg); | |
688 | break; | |
689 | case VHOST_IOTLB_INVALIDATE: | |
690 | vhost_vdpa_unmap(v, msg->iova, msg->size); | |
691 | break; | |
25abc060 JW |
692 | case VHOST_IOTLB_BATCH_BEGIN: |
693 | v->in_batch = true; | |
694 | break; | |
695 | case VHOST_IOTLB_BATCH_END: | |
696 | if (v->in_batch && ops->set_map) | |
697 | ops->set_map(vdpa, dev->iotlb); | |
698 | v->in_batch = false; | |
699 | break; | |
4c8cf318 TB |
700 | default: |
701 | r = -EINVAL; | |
702 | break; | |
703 | } | |
704 | ||
705 | return r; | |
706 | } | |
707 | ||
708 | static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb, | |
709 | struct iov_iter *from) | |
710 | { | |
711 | struct file *file = iocb->ki_filp; | |
712 | struct vhost_vdpa *v = file->private_data; | |
713 | struct vhost_dev *dev = &v->vdev; | |
714 | ||
715 | return vhost_chr_write_iter(dev, from); | |
716 | } | |
717 | ||
718 | static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) | |
719 | { | |
720 | struct vdpa_device *vdpa = v->vdpa; | |
721 | const struct vdpa_config_ops *ops = vdpa->config; | |
722 | struct device *dma_dev = vdpa_get_dma_dev(vdpa); | |
723 | struct bus_type *bus; | |
724 | int ret; | |
725 | ||
726 | /* Device want to do DMA by itself */ | |
727 | if (ops->set_map || ops->dma_map) | |
728 | return 0; | |
729 | ||
730 | bus = dma_dev->bus; | |
731 | if (!bus) | |
732 | return -EFAULT; | |
733 | ||
734 | if (!iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY)) | |
735 | return -ENOTSUPP; | |
736 | ||
737 | v->domain = iommu_domain_alloc(bus); | |
738 | if (!v->domain) | |
739 | return -EIO; | |
740 | ||
741 | ret = iommu_attach_device(v->domain, dma_dev); | |
742 | if (ret) | |
743 | goto err_attach; | |
744 | ||
745 | return 0; | |
746 | ||
747 | err_attach: | |
748 | iommu_domain_free(v->domain); | |
749 | return ret; | |
750 | } | |
751 | ||
752 | static void vhost_vdpa_free_domain(struct vhost_vdpa *v) | |
753 | { | |
754 | struct vdpa_device *vdpa = v->vdpa; | |
755 | struct device *dma_dev = vdpa_get_dma_dev(vdpa); | |
756 | ||
757 | if (v->domain) { | |
758 | iommu_detach_device(v->domain, dma_dev); | |
759 | iommu_domain_free(v->domain); | |
760 | } | |
761 | ||
762 | v->domain = NULL; | |
763 | } | |
764 | ||
765 | static int vhost_vdpa_open(struct inode *inode, struct file *filep) | |
766 | { | |
767 | struct vhost_vdpa *v; | |
768 | struct vhost_dev *dev; | |
769 | struct vhost_virtqueue **vqs; | |
770 | int nvqs, i, r, opened; | |
771 | ||
772 | v = container_of(inode->i_cdev, struct vhost_vdpa, cdev); | |
4c8cf318 TB |
773 | |
774 | opened = atomic_cmpxchg(&v->opened, 0, 1); | |
775 | if (opened) | |
776 | return -EBUSY; | |
777 | ||
778 | nvqs = v->nvqs; | |
779 | vhost_vdpa_reset(v); | |
780 | ||
781 | vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL); | |
782 | if (!vqs) { | |
783 | r = -ENOMEM; | |
784 | goto err; | |
785 | } | |
786 | ||
787 | dev = &v->vdev; | |
788 | for (i = 0; i < nvqs; i++) { | |
789 | vqs[i] = &v->vqs[i]; | |
790 | vqs[i]->handle_kick = handle_vq_kick; | |
791 | } | |
01fcb1cb | 792 | vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false, |
4c8cf318 TB |
793 | vhost_vdpa_process_iotlb_msg); |
794 | ||
795 | dev->iotlb = vhost_iotlb_alloc(0, 0); | |
796 | if (!dev->iotlb) { | |
797 | r = -ENOMEM; | |
798 | goto err_init_iotlb; | |
799 | } | |
800 | ||
801 | r = vhost_vdpa_alloc_domain(v); | |
802 | if (r) | |
803 | goto err_init_iotlb; | |
804 | ||
805 | filep->private_data = v; | |
806 | ||
807 | return 0; | |
808 | ||
809 | err_init_iotlb: | |
810 | vhost_dev_cleanup(&v->vdev); | |
811 | err: | |
812 | atomic_dec(&v->opened); | |
813 | return r; | |
814 | } | |
815 | ||
2cf1ba9a ZL |
816 | static void vhost_vdpa_clean_irq(struct vhost_vdpa *v) |
817 | { | |
818 | struct vhost_virtqueue *vq; | |
819 | int i; | |
820 | ||
821 | for (i = 0; i < v->nvqs; i++) { | |
822 | vq = &v->vqs[i]; | |
823 | if (vq->call_ctx.producer.irq) | |
824 | irq_bypass_unregister_producer(&vq->call_ctx.producer); | |
825 | } | |
826 | } | |
827 | ||
4c8cf318 TB |
828 | static int vhost_vdpa_release(struct inode *inode, struct file *filep) |
829 | { | |
830 | struct vhost_vdpa *v = filep->private_data; | |
831 | struct vhost_dev *d = &v->vdev; | |
832 | ||
833 | mutex_lock(&d->mutex); | |
834 | filep->private_data = NULL; | |
835 | vhost_vdpa_reset(v); | |
836 | vhost_dev_stop(&v->vdev); | |
837 | vhost_vdpa_iotlb_free(v); | |
838 | vhost_vdpa_free_domain(v); | |
776f3950 | 839 | vhost_vdpa_config_put(v); |
2cf1ba9a | 840 | vhost_vdpa_clean_irq(v); |
4c8cf318 TB |
841 | vhost_dev_cleanup(&v->vdev); |
842 | kfree(v->vdev.vqs); | |
843 | mutex_unlock(&d->mutex); | |
844 | ||
845 | atomic_dec(&v->opened); | |
846 | complete(&v->completion); | |
847 | ||
848 | return 0; | |
849 | } | |
850 | ||
4b4e4867 | 851 | #ifdef CONFIG_MMU |
ddd89d0a JW |
852 | static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf) |
853 | { | |
854 | struct vhost_vdpa *v = vmf->vma->vm_file->private_data; | |
855 | struct vdpa_device *vdpa = v->vdpa; | |
856 | const struct vdpa_config_ops *ops = vdpa->config; | |
857 | struct vdpa_notification_area notify; | |
858 | struct vm_area_struct *vma = vmf->vma; | |
859 | u16 index = vma->vm_pgoff; | |
860 | ||
861 | notify = ops->get_vq_notification(vdpa, index); | |
862 | ||
863 | vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); | |
864 | if (remap_pfn_range(vma, vmf->address & PAGE_MASK, | |
865 | notify.addr >> PAGE_SHIFT, PAGE_SIZE, | |
866 | vma->vm_page_prot)) | |
867 | return VM_FAULT_SIGBUS; | |
868 | ||
869 | return VM_FAULT_NOPAGE; | |
870 | } | |
871 | ||
872 | static const struct vm_operations_struct vhost_vdpa_vm_ops = { | |
873 | .fault = vhost_vdpa_fault, | |
874 | }; | |
875 | ||
876 | static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma) | |
877 | { | |
878 | struct vhost_vdpa *v = vma->vm_file->private_data; | |
879 | struct vdpa_device *vdpa = v->vdpa; | |
880 | const struct vdpa_config_ops *ops = vdpa->config; | |
881 | struct vdpa_notification_area notify; | |
c09cc2c3 | 882 | unsigned long index = vma->vm_pgoff; |
ddd89d0a JW |
883 | |
884 | if (vma->vm_end - vma->vm_start != PAGE_SIZE) | |
885 | return -EINVAL; | |
886 | if ((vma->vm_flags & VM_SHARED) == 0) | |
887 | return -EINVAL; | |
888 | if (vma->vm_flags & VM_READ) | |
889 | return -EINVAL; | |
890 | if (index > 65535) | |
891 | return -EINVAL; | |
892 | if (!ops->get_vq_notification) | |
893 | return -ENOTSUPP; | |
894 | ||
895 | /* To be safe and easily modelled by userspace, We only | |
896 | * support the doorbell which sits on the page boundary and | |
897 | * does not share the page with other registers. | |
898 | */ | |
899 | notify = ops->get_vq_notification(vdpa, index); | |
900 | if (notify.addr & (PAGE_SIZE - 1)) | |
901 | return -EINVAL; | |
902 | if (vma->vm_end - vma->vm_start != notify.size) | |
903 | return -ENOTSUPP; | |
904 | ||
905 | vma->vm_ops = &vhost_vdpa_vm_ops; | |
906 | return 0; | |
907 | } | |
4b4e4867 | 908 | #endif /* CONFIG_MMU */ |
ddd89d0a | 909 | |
4c8cf318 TB |
910 | static const struct file_operations vhost_vdpa_fops = { |
911 | .owner = THIS_MODULE, | |
912 | .open = vhost_vdpa_open, | |
913 | .release = vhost_vdpa_release, | |
914 | .write_iter = vhost_vdpa_chr_write_iter, | |
915 | .unlocked_ioctl = vhost_vdpa_unlocked_ioctl, | |
4b4e4867 | 916 | #ifdef CONFIG_MMU |
ddd89d0a | 917 | .mmap = vhost_vdpa_mmap, |
4b4e4867 | 918 | #endif /* CONFIG_MMU */ |
4c8cf318 TB |
919 | .compat_ioctl = compat_ptr_ioctl, |
920 | }; | |
921 | ||
922 | static void vhost_vdpa_release_dev(struct device *device) | |
923 | { | |
924 | struct vhost_vdpa *v = | |
925 | container_of(device, struct vhost_vdpa, dev); | |
926 | ||
927 | ida_simple_remove(&vhost_vdpa_ida, v->minor); | |
928 | kfree(v->vqs); | |
929 | kfree(v); | |
930 | } | |
931 | ||
932 | static int vhost_vdpa_probe(struct vdpa_device *vdpa) | |
933 | { | |
934 | const struct vdpa_config_ops *ops = vdpa->config; | |
935 | struct vhost_vdpa *v; | |
a9974489 | 936 | int minor; |
4c8cf318 TB |
937 | int r; |
938 | ||
939 | /* Currently, we only accept the network devices. */ | |
940 | if (ops->get_device_id(vdpa) != VIRTIO_ID_NET) | |
941 | return -ENOTSUPP; | |
942 | ||
943 | v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL); | |
944 | if (!v) | |
945 | return -ENOMEM; | |
946 | ||
947 | minor = ida_simple_get(&vhost_vdpa_ida, 0, | |
948 | VHOST_VDPA_DEV_MAX, GFP_KERNEL); | |
949 | if (minor < 0) { | |
950 | kfree(v); | |
951 | return minor; | |
952 | } | |
953 | ||
954 | atomic_set(&v->opened, 0); | |
955 | v->minor = minor; | |
956 | v->vdpa = vdpa; | |
a9974489 | 957 | v->nvqs = vdpa->nvqs; |
4c8cf318 TB |
958 | v->virtio_id = ops->get_device_id(vdpa); |
959 | ||
960 | device_initialize(&v->dev); | |
961 | v->dev.release = vhost_vdpa_release_dev; | |
962 | v->dev.parent = &vdpa->dev; | |
963 | v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor); | |
a9974489 | 964 | v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue), |
4c8cf318 TB |
965 | GFP_KERNEL); |
966 | if (!v->vqs) { | |
967 | r = -ENOMEM; | |
968 | goto err; | |
969 | } | |
970 | ||
971 | r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor); | |
972 | if (r) | |
973 | goto err; | |
974 | ||
975 | cdev_init(&v->cdev, &vhost_vdpa_fops); | |
976 | v->cdev.owner = THIS_MODULE; | |
977 | ||
978 | r = cdev_device_add(&v->cdev, &v->dev); | |
979 | if (r) | |
980 | goto err; | |
981 | ||
982 | init_completion(&v->completion); | |
983 | vdpa_set_drvdata(vdpa, v); | |
984 | ||
985 | return 0; | |
986 | ||
987 | err: | |
988 | put_device(&v->dev); | |
989 | return r; | |
990 | } | |
991 | ||
992 | static void vhost_vdpa_remove(struct vdpa_device *vdpa) | |
993 | { | |
994 | struct vhost_vdpa *v = vdpa_get_drvdata(vdpa); | |
995 | int opened; | |
996 | ||
997 | cdev_device_del(&v->cdev, &v->dev); | |
998 | ||
999 | do { | |
1000 | opened = atomic_cmpxchg(&v->opened, 0, 1); | |
1001 | if (!opened) | |
1002 | break; | |
1003 | wait_for_completion(&v->completion); | |
1004 | } while (1); | |
1005 | ||
1006 | put_device(&v->dev); | |
1007 | } | |
1008 | ||
1009 | static struct vdpa_driver vhost_vdpa_driver = { | |
1010 | .driver = { | |
1011 | .name = "vhost_vdpa", | |
1012 | }, | |
1013 | .probe = vhost_vdpa_probe, | |
1014 | .remove = vhost_vdpa_remove, | |
1015 | }; | |
1016 | ||
1017 | static int __init vhost_vdpa_init(void) | |
1018 | { | |
1019 | int r; | |
1020 | ||
1021 | r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX, | |
1022 | "vhost-vdpa"); | |
1023 | if (r) | |
1024 | goto err_alloc_chrdev; | |
1025 | ||
1026 | r = vdpa_register_driver(&vhost_vdpa_driver); | |
1027 | if (r) | |
1028 | goto err_vdpa_register_driver; | |
1029 | ||
1030 | return 0; | |
1031 | ||
1032 | err_vdpa_register_driver: | |
1033 | unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); | |
1034 | err_alloc_chrdev: | |
1035 | return r; | |
1036 | } | |
1037 | module_init(vhost_vdpa_init); | |
1038 | ||
1039 | static void __exit vhost_vdpa_exit(void) | |
1040 | { | |
1041 | vdpa_unregister_driver(&vhost_vdpa_driver); | |
1042 | unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); | |
1043 | } | |
1044 | module_exit(vhost_vdpa_exit); | |
1045 | ||
1046 | MODULE_VERSION("0.0.1"); | |
1047 | MODULE_LICENSE("GPL v2"); | |
1048 | MODULE_AUTHOR("Intel Corporation"); | |
1049 | MODULE_DESCRIPTION("vDPA-based vhost backend for virtio"); |