Commit | Line | Data |
---|---|---|
4c8cf318 TB |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Copyright (C) 2018-2020 Intel Corporation. | |
4 | * Copyright (C) 2020 Red Hat, Inc. | |
5 | * | |
6 | * Author: Tiwei Bie <tiwei.bie@intel.com> | |
7 | * Jason Wang <jasowang@redhat.com> | |
8 | * | |
9 | * Thanks Michael S. Tsirkin for the valuable comments and | |
10 | * suggestions. And thanks to Cunming Liang and Zhihong Wang for all | |
11 | * their supports. | |
12 | */ | |
13 | ||
14 | #include <linux/kernel.h> | |
15 | #include <linux/module.h> | |
16 | #include <linux/cdev.h> | |
17 | #include <linux/device.h> | |
ddd89d0a | 18 | #include <linux/mm.h> |
9d6d97bf | 19 | #include <linux/slab.h> |
4c8cf318 TB |
20 | #include <linux/iommu.h> |
21 | #include <linux/uuid.h> | |
22 | #include <linux/vdpa.h> | |
23 | #include <linux/nospec.h> | |
24 | #include <linux/vhost.h> | |
4c8cf318 TB |
25 | |
26 | #include "vhost.h" | |
27 | ||
653055b9 | 28 | enum { |
25abc060 JW |
29 | VHOST_VDPA_BACKEND_FEATURES = |
30 | (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) | | |
aaca8373 GD |
31 | (1ULL << VHOST_BACKEND_F_IOTLB_BATCH) | |
32 | (1ULL << VHOST_BACKEND_F_IOTLB_ASID), | |
653055b9 JW |
33 | }; |
34 | ||
4c8cf318 TB |
35 | #define VHOST_VDPA_DEV_MAX (1U << MINORBITS) |
36 | ||
3d569879 GD |
37 | #define VHOST_VDPA_IOTLB_BUCKETS 16 |
38 | ||
39 | struct vhost_vdpa_as { | |
40 | struct hlist_node hash_link; | |
41 | struct vhost_iotlb iotlb; | |
42 | u32 id; | |
43 | }; | |
44 | ||
4c8cf318 TB |
45 | struct vhost_vdpa { |
46 | struct vhost_dev vdev; | |
47 | struct iommu_domain *domain; | |
48 | struct vhost_virtqueue *vqs; | |
49 | struct completion completion; | |
50 | struct vdpa_device *vdpa; | |
3d569879 | 51 | struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS]; |
4c8cf318 TB |
52 | struct device dev; |
53 | struct cdev cdev; | |
54 | atomic_t opened; | |
81d46d69 | 55 | u32 nvqs; |
4c8cf318 TB |
56 | int virtio_id; |
57 | int minor; | |
776f3950 | 58 | struct eventfd_ctx *config_ctx; |
25abc060 | 59 | int in_batch; |
1b48dc03 | 60 | struct vdpa_iova_range range; |
aaca8373 | 61 | u32 batch_asid; |
c7e19440 | 62 | bool suspended; |
4c8cf318 TB |
63 | }; |
64 | ||
65 | static DEFINE_IDA(vhost_vdpa_ida); | |
66 | ||
67 | static dev_t vhost_vdpa_major; | |
68 | ||
c070c191 | 69 | static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, |
e794070a CL |
70 | struct vhost_iotlb *iotlb, u64 start, |
71 | u64 last, u32 asid); | |
c070c191 | 72 | |
aaca8373 GD |
73 | static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb) |
74 | { | |
75 | struct vhost_vdpa_as *as = container_of(iotlb, struct | |
76 | vhost_vdpa_as, iotlb); | |
77 | return as->id; | |
78 | } | |
79 | ||
3d569879 GD |
80 | static struct vhost_vdpa_as *asid_to_as(struct vhost_vdpa *v, u32 asid) |
81 | { | |
82 | struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS]; | |
83 | struct vhost_vdpa_as *as; | |
84 | ||
85 | hlist_for_each_entry(as, head, hash_link) | |
86 | if (as->id == asid) | |
87 | return as; | |
88 | ||
89 | return NULL; | |
90 | } | |
91 | ||
aaca8373 GD |
92 | static struct vhost_iotlb *asid_to_iotlb(struct vhost_vdpa *v, u32 asid) |
93 | { | |
94 | struct vhost_vdpa_as *as = asid_to_as(v, asid); | |
95 | ||
96 | if (!as) | |
97 | return NULL; | |
98 | ||
99 | return &as->iotlb; | |
100 | } | |
101 | ||
3d569879 GD |
102 | static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid) |
103 | { | |
104 | struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS]; | |
105 | struct vhost_vdpa_as *as; | |
106 | ||
107 | if (asid_to_as(v, asid)) | |
108 | return NULL; | |
109 | ||
aaca8373 GD |
110 | if (asid >= v->vdpa->nas) |
111 | return NULL; | |
112 | ||
3d569879 GD |
113 | as = kmalloc(sizeof(*as), GFP_KERNEL); |
114 | if (!as) | |
115 | return NULL; | |
116 | ||
117 | vhost_iotlb_init(&as->iotlb, 0, 0); | |
118 | as->id = asid; | |
119 | hlist_add_head(&as->hash_link, head); | |
120 | ||
121 | return as; | |
122 | } | |
123 | ||
aaca8373 GD |
124 | static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v, |
125 | u32 asid) | |
126 | { | |
127 | struct vhost_vdpa_as *as = asid_to_as(v, asid); | |
128 | ||
129 | if (as) | |
130 | return as; | |
131 | ||
132 | return vhost_vdpa_alloc_as(v, asid); | |
133 | } | |
134 | ||
1d0f874b SWL |
135 | static void vhost_vdpa_reset_map(struct vhost_vdpa *v, u32 asid) |
136 | { | |
137 | struct vdpa_device *vdpa = v->vdpa; | |
138 | const struct vdpa_config_ops *ops = vdpa->config; | |
139 | ||
140 | if (ops->reset_map) | |
141 | ops->reset_map(vdpa, asid); | |
142 | } | |
143 | ||
3d569879 GD |
144 | static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid) |
145 | { | |
146 | struct vhost_vdpa_as *as = asid_to_as(v, asid); | |
147 | ||
148 | if (!as) | |
149 | return -EINVAL; | |
150 | ||
151 | hlist_del(&as->hash_link); | |
e794070a | 152 | vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid); |
1d0f874b SWL |
153 | /* |
154 | * Devices with vendor specific IOMMU may need to restore | |
155 | * iotlb to the initial or default state, which cannot be | |
156 | * cleaned up in the all range unmap call above. Give them | |
157 | * a chance to clean up or reset the map to the desired | |
158 | * state. | |
159 | */ | |
160 | vhost_vdpa_reset_map(v, asid); | |
3d569879 GD |
161 | kfree(as); |
162 | ||
163 | return 0; | |
164 | } | |
165 | ||
4c8cf318 TB |
166 | static void handle_vq_kick(struct vhost_work *work) |
167 | { | |
168 | struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, | |
169 | poll.work); | |
170 | struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev); | |
171 | const struct vdpa_config_ops *ops = v->vdpa->config; | |
172 | ||
173 | ops->kick_vq(v->vdpa, vq - v->vqs); | |
174 | } | |
175 | ||
176 | static irqreturn_t vhost_vdpa_virtqueue_cb(void *private) | |
177 | { | |
178 | struct vhost_virtqueue *vq = private; | |
265a0ad8 | 179 | struct eventfd_ctx *call_ctx = vq->call_ctx.ctx; |
4c8cf318 TB |
180 | |
181 | if (call_ctx) | |
3652117f | 182 | eventfd_signal(call_ctx); |
4c8cf318 TB |
183 | |
184 | return IRQ_HANDLED; | |
185 | } | |
186 | ||
776f3950 ZL |
187 | static irqreturn_t vhost_vdpa_config_cb(void *private) |
188 | { | |
189 | struct vhost_vdpa *v = private; | |
190 | struct eventfd_ctx *config_ctx = v->config_ctx; | |
191 | ||
192 | if (config_ctx) | |
3652117f | 193 | eventfd_signal(config_ctx); |
776f3950 ZL |
194 | |
195 | return IRQ_HANDLED; | |
196 | } | |
197 | ||
2cf1ba9a ZL |
198 | static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) |
199 | { | |
200 | struct vhost_virtqueue *vq = &v->vqs[qid]; | |
201 | const struct vdpa_config_ops *ops = v->vdpa->config; | |
202 | struct vdpa_device *vdpa = v->vdpa; | |
203 | int ret, irq; | |
204 | ||
205 | if (!ops->get_vq_irq) | |
206 | return; | |
207 | ||
208 | irq = ops->get_vq_irq(vdpa, qid); | |
cce0ab2b ZL |
209 | if (irq < 0) |
210 | return; | |
211 | ||
2cf1ba9a | 212 | irq_bypass_unregister_producer(&vq->call_ctx.producer); |
cce0ab2b | 213 | if (!vq->call_ctx.ctx) |
2cf1ba9a | 214 | return; |
2cf1ba9a ZL |
215 | |
216 | vq->call_ctx.producer.token = vq->call_ctx.ctx; | |
217 | vq->call_ctx.producer.irq = irq; | |
218 | ret = irq_bypass_register_producer(&vq->call_ctx.producer); | |
e01afe36 ZL |
219 | if (unlikely(ret)) |
220 | dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret = %d\n", | |
221 | qid, vq->call_ctx.producer.token, ret); | |
2cf1ba9a ZL |
222 | } |
223 | ||
224 | static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) | |
225 | { | |
226 | struct vhost_virtqueue *vq = &v->vqs[qid]; | |
227 | ||
2cf1ba9a | 228 | irq_bypass_unregister_producer(&vq->call_ctx.producer); |
2cf1ba9a ZL |
229 | } |
230 | ||
bc91df5c | 231 | static int _compat_vdpa_reset(struct vhost_vdpa *v) |
4c8cf318 TB |
232 | { |
233 | struct vdpa_device *vdpa = v->vdpa; | |
bc91df5c | 234 | u32 flags = 0; |
4c8cf318 | 235 | |
c7e19440 DT |
236 | v->suspended = false; |
237 | ||
bc91df5c SWL |
238 | if (v->vdev.vqs) { |
239 | flags |= !vhost_backend_has_feature(v->vdev.vqs[0], | |
240 | VHOST_BACKEND_F_IOTLB_PERSIST) ? | |
241 | VDPA_RESET_F_CLEAN_MAP : 0; | |
242 | } | |
243 | ||
244 | return vdpa_reset(vdpa, flags); | |
245 | } | |
7f05630d | 246 | |
bc91df5c SWL |
247 | static int vhost_vdpa_reset(struct vhost_vdpa *v) |
248 | { | |
249 | v->in_batch = 0; | |
250 | return _compat_vdpa_reset(v); | |
4c8cf318 TB |
251 | } |
252 | ||
9067de47 SG |
253 | static long vhost_vdpa_bind_mm(struct vhost_vdpa *v) |
254 | { | |
255 | struct vdpa_device *vdpa = v->vdpa; | |
256 | const struct vdpa_config_ops *ops = vdpa->config; | |
257 | ||
258 | if (!vdpa->use_va || !ops->bind_mm) | |
259 | return 0; | |
260 | ||
261 | return ops->bind_mm(vdpa, v->vdev.mm); | |
262 | } | |
263 | ||
264 | static void vhost_vdpa_unbind_mm(struct vhost_vdpa *v) | |
265 | { | |
266 | struct vdpa_device *vdpa = v->vdpa; | |
267 | const struct vdpa_config_ops *ops = vdpa->config; | |
268 | ||
269 | if (!vdpa->use_va || !ops->unbind_mm) | |
270 | return; | |
271 | ||
272 | ops->unbind_mm(vdpa); | |
273 | } | |
274 | ||
4c8cf318 TB |
275 | static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp) |
276 | { | |
277 | struct vdpa_device *vdpa = v->vdpa; | |
278 | const struct vdpa_config_ops *ops = vdpa->config; | |
279 | u32 device_id; | |
280 | ||
281 | device_id = ops->get_device_id(vdpa); | |
282 | ||
283 | if (copy_to_user(argp, &device_id, sizeof(device_id))) | |
284 | return -EFAULT; | |
285 | ||
286 | return 0; | |
287 | } | |
288 | ||
289 | static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp) | |
290 | { | |
291 | struct vdpa_device *vdpa = v->vdpa; | |
f6d955d8 | 292 | const struct vdpa_config_ops *ops = vdpa->config; |
4c8cf318 TB |
293 | u8 status; |
294 | ||
f6d955d8 | 295 | status = ops->get_status(vdpa); |
4c8cf318 TB |
296 | |
297 | if (copy_to_user(statusp, &status, sizeof(status))) | |
298 | return -EFAULT; | |
299 | ||
300 | return 0; | |
301 | } | |
302 | ||
303 | static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) | |
304 | { | |
305 | struct vdpa_device *vdpa = v->vdpa; | |
306 | const struct vdpa_config_ops *ops = vdpa->config; | |
2cf1ba9a | 307 | u8 status, status_old; |
81d46d69 L |
308 | u32 nvqs = v->nvqs; |
309 | int ret; | |
2cf1ba9a | 310 | u16 i; |
4c8cf318 TB |
311 | |
312 | if (copy_from_user(&status, statusp, sizeof(status))) | |
313 | return -EFAULT; | |
314 | ||
f6d955d8 | 315 | status_old = ops->get_status(vdpa); |
2cf1ba9a | 316 | |
4c8cf318 TB |
317 | /* |
318 | * Userspace shouldn't remove status bits unless reset the | |
319 | * status to 0. | |
320 | */ | |
23118b09 | 321 | if (status != 0 && (status_old & ~status) != 0) |
4c8cf318 TB |
322 | return -EINVAL; |
323 | ||
97f854be WZ |
324 | if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK)) |
325 | for (i = 0; i < nvqs; i++) | |
326 | vhost_vdpa_unsetup_vq_irq(v, i); | |
327 | ||
0686082d | 328 | if (status == 0) { |
bc91df5c | 329 | ret = _compat_vdpa_reset(v); |
0686082d XY |
330 | if (ret) |
331 | return ret; | |
332 | } else | |
73bc0dbb | 333 | vdpa_set_status(vdpa, status); |
4c8cf318 | 334 | |
2cf1ba9a ZL |
335 | if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) |
336 | for (i = 0; i < nvqs; i++) | |
337 | vhost_vdpa_setup_vq_irq(v, i); | |
338 | ||
4c8cf318 TB |
339 | return 0; |
340 | } | |
341 | ||
342 | static int vhost_vdpa_config_validate(struct vhost_vdpa *v, | |
343 | struct vhost_vdpa_config *c) | |
344 | { | |
d6d8bb92 | 345 | struct vdpa_device *vdpa = v->vdpa; |
870aaff9 | 346 | size_t size = vdpa->config->get_config_size(vdpa); |
4c8cf318 | 347 | |
3ed21c14 | 348 | if (c->len == 0 || c->off > size) |
4c8cf318 TB |
349 | return -EINVAL; |
350 | ||
351 | if (c->len > size - c->off) | |
352 | return -E2BIG; | |
353 | ||
354 | return 0; | |
355 | } | |
356 | ||
357 | static long vhost_vdpa_get_config(struct vhost_vdpa *v, | |
358 | struct vhost_vdpa_config __user *c) | |
359 | { | |
360 | struct vdpa_device *vdpa = v->vdpa; | |
4c8cf318 TB |
361 | struct vhost_vdpa_config config; |
362 | unsigned long size = offsetof(struct vhost_vdpa_config, buf); | |
363 | u8 *buf; | |
364 | ||
365 | if (copy_from_user(&config, c, size)) | |
366 | return -EFAULT; | |
367 | if (vhost_vdpa_config_validate(v, &config)) | |
368 | return -EINVAL; | |
369 | buf = kvzalloc(config.len, GFP_KERNEL); | |
370 | if (!buf) | |
371 | return -ENOMEM; | |
372 | ||
0d234007 | 373 | vdpa_get_config(vdpa, config.off, buf, config.len); |
4c8cf318 TB |
374 | |
375 | if (copy_to_user(c->buf, buf, config.len)) { | |
376 | kvfree(buf); | |
377 | return -EFAULT; | |
378 | } | |
379 | ||
380 | kvfree(buf); | |
381 | return 0; | |
382 | } | |
383 | ||
384 | static long vhost_vdpa_set_config(struct vhost_vdpa *v, | |
385 | struct vhost_vdpa_config __user *c) | |
386 | { | |
387 | struct vdpa_device *vdpa = v->vdpa; | |
4c8cf318 TB |
388 | struct vhost_vdpa_config config; |
389 | unsigned long size = offsetof(struct vhost_vdpa_config, buf); | |
390 | u8 *buf; | |
391 | ||
392 | if (copy_from_user(&config, c, size)) | |
393 | return -EFAULT; | |
394 | if (vhost_vdpa_config_validate(v, &config)) | |
395 | return -EINVAL; | |
4c8cf318 | 396 | |
0ab4b890 TT |
397 | buf = vmemdup_user(c->buf, config.len); |
398 | if (IS_ERR(buf)) | |
399 | return PTR_ERR(buf); | |
4c8cf318 | 400 | |
6dbb1f16 | 401 | vdpa_set_config(vdpa, config.off, buf, config.len); |
4c8cf318 TB |
402 | |
403 | kvfree(buf); | |
404 | return 0; | |
405 | } | |
406 | ||
0723f1df EP |
407 | static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v) |
408 | { | |
409 | struct vdpa_device *vdpa = v->vdpa; | |
410 | const struct vdpa_config_ops *ops = vdpa->config; | |
411 | ||
412 | return ops->suspend; | |
413 | } | |
414 | ||
69106b6f SB |
415 | static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v) |
416 | { | |
417 | struct vdpa_device *vdpa = v->vdpa; | |
418 | const struct vdpa_config_ops *ops = vdpa->config; | |
419 | ||
420 | return ops->resume; | |
421 | } | |
422 | ||
7db0d602 SWL |
423 | static bool vhost_vdpa_has_desc_group(const struct vhost_vdpa *v) |
424 | { | |
425 | struct vdpa_device *vdpa = v->vdpa; | |
426 | const struct vdpa_config_ops *ops = vdpa->config; | |
427 | ||
428 | return ops->get_vq_desc_group; | |
429 | } | |
430 | ||
4c8cf318 TB |
431 | static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) |
432 | { | |
433 | struct vdpa_device *vdpa = v->vdpa; | |
434 | const struct vdpa_config_ops *ops = vdpa->config; | |
435 | u64 features; | |
436 | ||
a64917bc | 437 | features = ops->get_device_features(vdpa); |
4c8cf318 TB |
438 | |
439 | if (copy_to_user(featurep, &features, sizeof(features))) | |
440 | return -EFAULT; | |
441 | ||
442 | return 0; | |
443 | } | |
444 | ||
b63e5c70 EP |
445 | static u64 vhost_vdpa_get_backend_features(const struct vhost_vdpa *v) |
446 | { | |
447 | struct vdpa_device *vdpa = v->vdpa; | |
448 | const struct vdpa_config_ops *ops = vdpa->config; | |
449 | ||
450 | if (!ops->get_backend_features) | |
451 | return 0; | |
452 | else | |
453 | return ops->get_backend_features(vdpa); | |
454 | } | |
455 | ||
4398776f SWL |
456 | static bool vhost_vdpa_has_persistent_map(const struct vhost_vdpa *v) |
457 | { | |
458 | struct vdpa_device *vdpa = v->vdpa; | |
459 | const struct vdpa_config_ops *ops = vdpa->config; | |
460 | ||
461 | return (!ops->set_map && !ops->dma_map) || ops->reset_map || | |
462 | vhost_vdpa_get_backend_features(v) & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST); | |
463 | } | |
464 | ||
4c8cf318 TB |
465 | static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) |
466 | { | |
467 | struct vdpa_device *vdpa = v->vdpa; | |
468 | const struct vdpa_config_ops *ops = vdpa->config; | |
376daf31 SN |
469 | struct vhost_dev *d = &v->vdev; |
470 | u64 actual_features; | |
4c8cf318 | 471 | u64 features; |
376daf31 | 472 | int i; |
4c8cf318 TB |
473 | |
474 | /* | |
475 | * It's not allowed to change the features after they have | |
476 | * been negotiated. | |
477 | */ | |
478 | if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK) | |
479 | return -EBUSY; | |
480 | ||
481 | if (copy_from_user(&features, featurep, sizeof(features))) | |
482 | return -EFAULT; | |
483 | ||
e0077cc1 | 484 | if (vdpa_set_features(vdpa, features)) |
4c8cf318 TB |
485 | return -EINVAL; |
486 | ||
376daf31 SN |
487 | /* let the vqs know what has been configured */ |
488 | actual_features = ops->get_driver_features(vdpa); | |
489 | for (i = 0; i < d->nvqs; ++i) { | |
490 | struct vhost_virtqueue *vq = d->vqs[i]; | |
491 | ||
492 | mutex_lock(&vq->mutex); | |
493 | vq->acked_features = actual_features; | |
494 | mutex_unlock(&vq->mutex); | |
495 | } | |
496 | ||
4c8cf318 TB |
497 | return 0; |
498 | } | |
499 | ||
500 | static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp) | |
501 | { | |
502 | struct vdpa_device *vdpa = v->vdpa; | |
503 | const struct vdpa_config_ops *ops = vdpa->config; | |
504 | u16 num; | |
505 | ||
506 | num = ops->get_vq_num_max(vdpa); | |
507 | ||
508 | if (copy_to_user(argp, &num, sizeof(num))) | |
509 | return -EFAULT; | |
510 | ||
511 | return 0; | |
512 | } | |
513 | ||
776f3950 ZL |
514 | static void vhost_vdpa_config_put(struct vhost_vdpa *v) |
515 | { | |
f6bbf001 | 516 | if (v->config_ctx) { |
776f3950 | 517 | eventfd_ctx_put(v->config_ctx); |
f6bbf001 SG |
518 | v->config_ctx = NULL; |
519 | } | |
776f3950 ZL |
520 | } |
521 | ||
522 | static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) | |
523 | { | |
524 | struct vdpa_callback cb; | |
525 | int fd; | |
526 | struct eventfd_ctx *ctx; | |
527 | ||
528 | cb.callback = vhost_vdpa_config_cb; | |
bcef9356 | 529 | cb.private = v; |
776f3950 ZL |
530 | if (copy_from_user(&fd, argp, sizeof(fd))) |
531 | return -EFAULT; | |
532 | ||
533 | ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd); | |
534 | swap(ctx, v->config_ctx); | |
535 | ||
536 | if (!IS_ERR_OR_NULL(ctx)) | |
537 | eventfd_ctx_put(ctx); | |
538 | ||
0bde59c1 SG |
539 | if (IS_ERR(v->config_ctx)) { |
540 | long ret = PTR_ERR(v->config_ctx); | |
541 | ||
542 | v->config_ctx = NULL; | |
543 | return ret; | |
544 | } | |
776f3950 ZL |
545 | |
546 | v->vdpa->config->set_config_cb(v->vdpa, &cb); | |
547 | ||
548 | return 0; | |
549 | } | |
2cf1ba9a | 550 | |
1b48dc03 JW |
551 | static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp) |
552 | { | |
553 | struct vhost_vdpa_iova_range range = { | |
554 | .first = v->range.first, | |
555 | .last = v->range.last, | |
556 | }; | |
557 | ||
2c602741 DC |
558 | if (copy_to_user(argp, &range, sizeof(range))) |
559 | return -EFAULT; | |
560 | return 0; | |
1b48dc03 JW |
561 | } |
562 | ||
a61280dd L |
563 | static long vhost_vdpa_get_config_size(struct vhost_vdpa *v, u32 __user *argp) |
564 | { | |
565 | struct vdpa_device *vdpa = v->vdpa; | |
566 | const struct vdpa_config_ops *ops = vdpa->config; | |
567 | u32 size; | |
568 | ||
569 | size = ops->get_config_size(vdpa); | |
570 | ||
571 | if (copy_to_user(argp, &size, sizeof(size))) | |
572 | return -EFAULT; | |
573 | ||
574 | return 0; | |
575 | } | |
576 | ||
b04d910a L |
577 | static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp) |
578 | { | |
579 | struct vdpa_device *vdpa = v->vdpa; | |
580 | ||
581 | if (copy_to_user(argp, &vdpa->nvqs, sizeof(vdpa->nvqs))) | |
582 | return -EFAULT; | |
583 | ||
584 | return 0; | |
585 | } | |
586 | ||
f345a014 EP |
587 | /* After a successful return of ioctl the device must not process more |
588 | * virtqueue descriptors. The device can answer to read or writes of config | |
589 | * fields as if it were not suspended. In particular, writing to "queue_enable" | |
590 | * with a value of 1 will not make the device start processing buffers. | |
591 | */ | |
592 | static long vhost_vdpa_suspend(struct vhost_vdpa *v) | |
593 | { | |
594 | struct vdpa_device *vdpa = v->vdpa; | |
595 | const struct vdpa_config_ops *ops = vdpa->config; | |
c7e19440 | 596 | int ret; |
f345a014 EP |
597 | |
598 | if (!ops->suspend) | |
599 | return -EOPNOTSUPP; | |
600 | ||
c7e19440 DT |
601 | ret = ops->suspend(vdpa); |
602 | if (!ret) | |
603 | v->suspended = true; | |
604 | ||
605 | return ret; | |
f345a014 EP |
606 | } |
607 | ||
3b688d7a SB |
608 | /* After a successful return of this ioctl the device resumes processing |
609 | * virtqueue descriptors. The device becomes fully operational the same way it | |
610 | * was before it was suspended. | |
611 | */ | |
612 | static long vhost_vdpa_resume(struct vhost_vdpa *v) | |
613 | { | |
614 | struct vdpa_device *vdpa = v->vdpa; | |
615 | const struct vdpa_config_ops *ops = vdpa->config; | |
c7e19440 | 616 | int ret; |
3b688d7a SB |
617 | |
618 | if (!ops->resume) | |
619 | return -EOPNOTSUPP; | |
620 | ||
c7e19440 DT |
621 | ret = ops->resume(vdpa); |
622 | if (!ret) | |
623 | v->suspended = false; | |
624 | ||
625 | return ret; | |
3b688d7a SB |
626 | } |
627 | ||
4c8cf318 TB |
628 | static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, |
629 | void __user *argp) | |
630 | { | |
631 | struct vdpa_device *vdpa = v->vdpa; | |
632 | const struct vdpa_config_ops *ops = vdpa->config; | |
aac50c0b | 633 | struct vdpa_vq_state vq_state; |
4c8cf318 TB |
634 | struct vdpa_callback cb; |
635 | struct vhost_virtqueue *vq; | |
636 | struct vhost_vring_state s; | |
4c8cf318 TB |
637 | u32 idx; |
638 | long r; | |
639 | ||
640 | r = get_user(idx, (u32 __user *)argp); | |
641 | if (r < 0) | |
642 | return r; | |
643 | ||
644 | if (idx >= v->nvqs) | |
645 | return -ENOBUFS; | |
646 | ||
647 | idx = array_index_nospec(idx, v->nvqs); | |
648 | vq = &v->vqs[idx]; | |
649 | ||
b0bd82bf JW |
650 | switch (cmd) { |
651 | case VHOST_VDPA_SET_VRING_ENABLE: | |
4c8cf318 TB |
652 | if (copy_from_user(&s, argp, sizeof(s))) |
653 | return -EFAULT; | |
654 | ops->set_vq_ready(vdpa, idx, s.num); | |
655 | return 0; | |
2d1fcb77 | 656 | case VHOST_VDPA_GET_VRING_GROUP: |
00d1f546 JW |
657 | if (!ops->get_vq_group) |
658 | return -EOPNOTSUPP; | |
2d1fcb77 GD |
659 | s.index = idx; |
660 | s.num = ops->get_vq_group(vdpa, idx); | |
661 | if (s.num >= vdpa->ngroups) | |
662 | return -EIO; | |
663 | else if (copy_to_user(argp, &s, sizeof(s))) | |
c8068d9b SWL |
664 | return -EFAULT; |
665 | return 0; | |
666 | case VHOST_VDPA_GET_VRING_DESC_GROUP: | |
667 | if (!vhost_vdpa_has_desc_group(v)) | |
668 | return -EOPNOTSUPP; | |
669 | s.index = idx; | |
670 | s.num = ops->get_vq_desc_group(vdpa, idx); | |
671 | if (s.num >= vdpa->ngroups) | |
672 | return -EIO; | |
673 | else if (copy_to_user(argp, &s, sizeof(s))) | |
2d1fcb77 GD |
674 | return -EFAULT; |
675 | return 0; | |
84d7c8fd GD |
676 | case VHOST_VDPA_SET_GROUP_ASID: |
677 | if (copy_from_user(&s, argp, sizeof(s))) | |
678 | return -EFAULT; | |
679 | if (s.num >= vdpa->nas) | |
680 | return -EINVAL; | |
681 | if (!ops->set_group_asid) | |
682 | return -EOPNOTSUPP; | |
683 | return ops->set_group_asid(vdpa, idx, s.num); | |
b0bd82bf | 684 | case VHOST_GET_VRING_BASE: |
23750e39 EC |
685 | r = ops->get_vq_state(v->vdpa, idx, &vq_state); |
686 | if (r) | |
687 | return r; | |
688 | ||
beee7fdb SN |
689 | if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { |
690 | vq->last_avail_idx = vq_state.packed.last_avail_idx | | |
691 | (vq_state.packed.last_avail_counter << 15); | |
692 | vq->last_used_idx = vq_state.packed.last_used_idx | | |
693 | (vq_state.packed.last_used_counter << 15); | |
694 | } else { | |
695 | vq->last_avail_idx = vq_state.split.avail_index; | |
696 | } | |
b0bd82bf JW |
697 | break; |
698 | } | |
4c8cf318 TB |
699 | |
700 | r = vhost_vring_ioctl(&v->vdev, cmd, argp); | |
701 | if (r) | |
702 | return r; | |
703 | ||
704 | switch (cmd) { | |
705 | case VHOST_SET_VRING_ADDR: | |
a09483c4 DT |
706 | if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended) |
707 | return -EINVAL; | |
708 | ||
4c8cf318 TB |
709 | if (ops->set_vq_address(vdpa, idx, |
710 | (u64)(uintptr_t)vq->desc, | |
711 | (u64)(uintptr_t)vq->avail, | |
712 | (u64)(uintptr_t)vq->used)) | |
713 | r = -EINVAL; | |
714 | break; | |
715 | ||
716 | case VHOST_SET_VRING_BASE: | |
a09483c4 DT |
717 | if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended) |
718 | return -EINVAL; | |
719 | ||
beee7fdb SN |
720 | if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { |
721 | vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff; | |
722 | vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000); | |
723 | vq_state.packed.last_used_idx = vq->last_used_idx & 0x7fff; | |
724 | vq_state.packed.last_used_counter = !!(vq->last_used_idx & 0x8000); | |
725 | } else { | |
726 | vq_state.split.avail_index = vq->last_avail_idx; | |
727 | } | |
728 | r = ops->set_vq_state(vdpa, idx, &vq_state); | |
4c8cf318 TB |
729 | break; |
730 | ||
731 | case VHOST_SET_VRING_CALL: | |
265a0ad8 | 732 | if (vq->call_ctx.ctx) { |
4c8cf318 TB |
733 | cb.callback = vhost_vdpa_virtqueue_cb; |
734 | cb.private = vq; | |
5e68470f | 735 | cb.trigger = vq->call_ctx.ctx; |
4c8cf318 TB |
736 | } else { |
737 | cb.callback = NULL; | |
738 | cb.private = NULL; | |
5e68470f | 739 | cb.trigger = NULL; |
4c8cf318 TB |
740 | } |
741 | ops->set_vq_cb(vdpa, idx, &cb); | |
2cf1ba9a | 742 | vhost_vdpa_setup_vq_irq(v, idx); |
4c8cf318 TB |
743 | break; |
744 | ||
745 | case VHOST_SET_VRING_NUM: | |
746 | ops->set_vq_num(vdpa, idx, vq->num); | |
747 | break; | |
748 | } | |
749 | ||
750 | return r; | |
751 | } | |
752 | ||
753 | static long vhost_vdpa_unlocked_ioctl(struct file *filep, | |
754 | unsigned int cmd, unsigned long arg) | |
755 | { | |
756 | struct vhost_vdpa *v = filep->private_data; | |
757 | struct vhost_dev *d = &v->vdev; | |
758 | void __user *argp = (void __user *)arg; | |
a127c5bb JW |
759 | u64 __user *featurep = argp; |
760 | u64 features; | |
7922460e | 761 | long r = 0; |
4c8cf318 | 762 | |
a127c5bb | 763 | if (cmd == VHOST_SET_BACKEND_FEATURES) { |
7922460e DC |
764 | if (copy_from_user(&features, featurep, sizeof(features))) |
765 | return -EFAULT; | |
0723f1df | 766 | if (features & ~(VHOST_VDPA_BACKEND_FEATURES | |
7db0d602 | 767 | BIT_ULL(VHOST_BACKEND_F_DESC_ASID) | |
4398776f | 768 | BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST) | |
69106b6f | 769 | BIT_ULL(VHOST_BACKEND_F_SUSPEND) | |
9f09fd61 EP |
770 | BIT_ULL(VHOST_BACKEND_F_RESUME) | |
771 | BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK))) | |
0723f1df EP |
772 | return -EOPNOTSUPP; |
773 | if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) && | |
774 | !vhost_vdpa_can_suspend(v)) | |
a127c5bb | 775 | return -EOPNOTSUPP; |
69106b6f SB |
776 | if ((features & BIT_ULL(VHOST_BACKEND_F_RESUME)) && |
777 | !vhost_vdpa_can_resume(v)) | |
778 | return -EOPNOTSUPP; | |
7db0d602 SWL |
779 | if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) && |
780 | !(features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) | |
781 | return -EINVAL; | |
782 | if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) && | |
783 | !vhost_vdpa_has_desc_group(v)) | |
784 | return -EOPNOTSUPP; | |
4398776f SWL |
785 | if ((features & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST)) && |
786 | !vhost_vdpa_has_persistent_map(v)) | |
787 | return -EOPNOTSUPP; | |
a127c5bb JW |
788 | vhost_set_backend_features(&v->vdev, features); |
789 | return 0; | |
790 | } | |
791 | ||
4c8cf318 TB |
792 | mutex_lock(&d->mutex); |
793 | ||
794 | switch (cmd) { | |
795 | case VHOST_VDPA_GET_DEVICE_ID: | |
796 | r = vhost_vdpa_get_device_id(v, argp); | |
797 | break; | |
798 | case VHOST_VDPA_GET_STATUS: | |
799 | r = vhost_vdpa_get_status(v, argp); | |
800 | break; | |
801 | case VHOST_VDPA_SET_STATUS: | |
802 | r = vhost_vdpa_set_status(v, argp); | |
803 | break; | |
804 | case VHOST_VDPA_GET_CONFIG: | |
805 | r = vhost_vdpa_get_config(v, argp); | |
806 | break; | |
807 | case VHOST_VDPA_SET_CONFIG: | |
808 | r = vhost_vdpa_set_config(v, argp); | |
809 | break; | |
810 | case VHOST_GET_FEATURES: | |
811 | r = vhost_vdpa_get_features(v, argp); | |
812 | break; | |
813 | case VHOST_SET_FEATURES: | |
814 | r = vhost_vdpa_set_features(v, argp); | |
815 | break; | |
816 | case VHOST_VDPA_GET_VRING_NUM: | |
817 | r = vhost_vdpa_get_vring_num(v, argp); | |
818 | break; | |
3ace88bd | 819 | case VHOST_VDPA_GET_GROUP_NUM: |
f4a8686e DC |
820 | if (copy_to_user(argp, &v->vdpa->ngroups, |
821 | sizeof(v->vdpa->ngroups))) | |
822 | r = -EFAULT; | |
3ace88bd | 823 | break; |
a0c95f20 | 824 | case VHOST_VDPA_GET_AS_NUM: |
f4a8686e DC |
825 | if (copy_to_user(argp, &v->vdpa->nas, sizeof(v->vdpa->nas))) |
826 | r = -EFAULT; | |
a0c95f20 | 827 | break; |
4c8cf318 TB |
828 | case VHOST_SET_LOG_BASE: |
829 | case VHOST_SET_LOG_FD: | |
830 | r = -ENOIOCTLCMD; | |
831 | break; | |
776f3950 ZL |
832 | case VHOST_VDPA_SET_CONFIG_CALL: |
833 | r = vhost_vdpa_set_config_call(v, argp); | |
834 | break; | |
a127c5bb JW |
835 | case VHOST_GET_BACKEND_FEATURES: |
836 | features = VHOST_VDPA_BACKEND_FEATURES; | |
0723f1df EP |
837 | if (vhost_vdpa_can_suspend(v)) |
838 | features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND); | |
69106b6f SB |
839 | if (vhost_vdpa_can_resume(v)) |
840 | features |= BIT_ULL(VHOST_BACKEND_F_RESUME); | |
7db0d602 SWL |
841 | if (vhost_vdpa_has_desc_group(v)) |
842 | features |= BIT_ULL(VHOST_BACKEND_F_DESC_ASID); | |
4398776f SWL |
843 | if (vhost_vdpa_has_persistent_map(v)) |
844 | features |= BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST); | |
b63e5c70 | 845 | features |= vhost_vdpa_get_backend_features(v); |
7922460e DC |
846 | if (copy_to_user(featurep, &features, sizeof(features))) |
847 | r = -EFAULT; | |
a127c5bb | 848 | break; |
1b48dc03 JW |
849 | case VHOST_VDPA_GET_IOVA_RANGE: |
850 | r = vhost_vdpa_get_iova_range(v, argp); | |
851 | break; | |
a61280dd L |
852 | case VHOST_VDPA_GET_CONFIG_SIZE: |
853 | r = vhost_vdpa_get_config_size(v, argp); | |
854 | break; | |
b04d910a L |
855 | case VHOST_VDPA_GET_VQS_COUNT: |
856 | r = vhost_vdpa_get_vqs_count(v, argp); | |
857 | break; | |
f345a014 EP |
858 | case VHOST_VDPA_SUSPEND: |
859 | r = vhost_vdpa_suspend(v); | |
860 | break; | |
3b688d7a SB |
861 | case VHOST_VDPA_RESUME: |
862 | r = vhost_vdpa_resume(v); | |
863 | break; | |
4c8cf318 TB |
864 | default: |
865 | r = vhost_dev_ioctl(&v->vdev, cmd, argp); | |
866 | if (r == -ENOIOCTLCMD) | |
867 | r = vhost_vdpa_vring_ioctl(v, cmd, argp); | |
868 | break; | |
869 | } | |
870 | ||
9067de47 SG |
871 | if (r) |
872 | goto out; | |
873 | ||
874 | switch (cmd) { | |
875 | case VHOST_SET_OWNER: | |
876 | r = vhost_vdpa_bind_mm(v); | |
877 | if (r) | |
878 | vhost_dev_reset_owner(d, NULL); | |
879 | break; | |
880 | } | |
881 | out: | |
4c8cf318 TB |
882 | mutex_unlock(&d->mutex); |
883 | return r; | |
884 | } | |
e794070a CL |
885 | static void vhost_vdpa_general_unmap(struct vhost_vdpa *v, |
886 | struct vhost_iotlb_map *map, u32 asid) | |
887 | { | |
888 | struct vdpa_device *vdpa = v->vdpa; | |
889 | const struct vdpa_config_ops *ops = vdpa->config; | |
890 | if (ops->dma_map) { | |
891 | ops->dma_unmap(vdpa, asid, map->start, map->size); | |
892 | } else if (ops->set_map == NULL) { | |
893 | iommu_unmap(v->domain, map->start, map->size); | |
894 | } | |
895 | } | |
4c8cf318 | 896 | |
e794070a CL |
897 | static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, |
898 | u64 start, u64 last, u32 asid) | |
4c8cf318 TB |
899 | { |
900 | struct vhost_dev *dev = &v->vdev; | |
4c8cf318 TB |
901 | struct vhost_iotlb_map *map; |
902 | struct page *page; | |
903 | unsigned long pfn, pinned; | |
904 | ||
905 | while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { | |
729ce5a5 CH |
906 | pinned = PFN_DOWN(map->size); |
907 | for (pfn = PFN_DOWN(map->addr); | |
4c8cf318 TB |
908 | pinned > 0; pfn++, pinned--) { |
909 | page = pfn_to_page(pfn); | |
910 | if (map->perm & VHOST_ACCESS_WO) | |
911 | set_page_dirty_lock(page); | |
912 | unpin_user_page(page); | |
913 | } | |
729ce5a5 | 914 | atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm); |
e794070a | 915 | vhost_vdpa_general_unmap(v, map, asid); |
4c8cf318 TB |
916 | vhost_iotlb_map_free(iotlb, map); |
917 | } | |
918 | } | |
919 | ||
e794070a CL |
920 | static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, |
921 | u64 start, u64 last, u32 asid) | |
d8945ec4 | 922 | { |
d8945ec4 XY |
923 | struct vhost_iotlb_map *map; |
924 | struct vdpa_map_file *map_file; | |
925 | ||
926 | while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { | |
927 | map_file = (struct vdpa_map_file *)map->opaque; | |
928 | fput(map_file->file); | |
929 | kfree(map_file); | |
e794070a | 930 | vhost_vdpa_general_unmap(v, map, asid); |
d8945ec4 XY |
931 | vhost_iotlb_map_free(iotlb, map); |
932 | } | |
933 | } | |
934 | ||
ae967246 | 935 | static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, |
e794070a CL |
936 | struct vhost_iotlb *iotlb, u64 start, |
937 | u64 last, u32 asid) | |
22af48cf | 938 | { |
d8945ec4 XY |
939 | struct vdpa_device *vdpa = v->vdpa; |
940 | ||
941 | if (vdpa->use_va) | |
e794070a | 942 | return vhost_vdpa_va_unmap(v, iotlb, start, last, asid); |
d8945ec4 | 943 | |
e794070a | 944 | return vhost_vdpa_pa_unmap(v, iotlb, start, last, asid); |
22af48cf XY |
945 | } |
946 | ||
4c8cf318 TB |
947 | static int perm_to_iommu_flags(u32 perm) |
948 | { | |
949 | int flags = 0; | |
950 | ||
951 | switch (perm) { | |
952 | case VHOST_ACCESS_WO: | |
953 | flags |= IOMMU_WRITE; | |
954 | break; | |
955 | case VHOST_ACCESS_RO: | |
956 | flags |= IOMMU_READ; | |
957 | break; | |
958 | case VHOST_ACCESS_RW: | |
959 | flags |= (IOMMU_WRITE | IOMMU_READ); | |
960 | break; | |
961 | default: | |
962 | WARN(1, "invalidate vhost IOTLB permission\n"); | |
963 | break; | |
964 | } | |
965 | ||
966 | return flags | IOMMU_CACHE; | |
967 | } | |
968 | ||
ae967246 GD |
969 | static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, |
970 | u64 iova, u64 size, u64 pa, u32 perm, void *opaque) | |
4c8cf318 TB |
971 | { |
972 | struct vhost_dev *dev = &v->vdev; | |
973 | struct vdpa_device *vdpa = v->vdpa; | |
974 | const struct vdpa_config_ops *ops = vdpa->config; | |
aaca8373 | 975 | u32 asid = iotlb_to_asid(iotlb); |
4c8cf318 TB |
976 | int r = 0; |
977 | ||
ae967246 | 978 | r = vhost_iotlb_add_range_ctx(iotlb, iova, iova + size - 1, |
d8945ec4 | 979 | pa, perm, opaque); |
4c8cf318 TB |
980 | if (r) |
981 | return r; | |
982 | ||
25abc060 | 983 | if (ops->dma_map) { |
aaca8373 | 984 | r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque); |
25abc060 JW |
985 | } else if (ops->set_map) { |
986 | if (!v->in_batch) | |
aaca8373 | 987 | r = ops->set_map(vdpa, asid, iotlb); |
25abc060 | 988 | } else { |
4c8cf318 | 989 | r = iommu_map(v->domain, iova, pa, size, |
ab78ffe1 PT |
990 | perm_to_iommu_flags(perm), |
991 | GFP_KERNEL_ACCOUNT); | |
25abc060 | 992 | } |
d8945ec4 | 993 | if (r) { |
ae967246 | 994 | vhost_iotlb_del_range(iotlb, iova, iova + size - 1); |
d8945ec4 XY |
995 | return r; |
996 | } | |
997 | ||
998 | if (!vdpa->use_va) | |
729ce5a5 | 999 | atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm); |
1477c8ae | 1000 | |
d8945ec4 | 1001 | return 0; |
4c8cf318 TB |
1002 | } |
1003 | ||
ae967246 GD |
1004 | static void vhost_vdpa_unmap(struct vhost_vdpa *v, |
1005 | struct vhost_iotlb *iotlb, | |
1006 | u64 iova, u64 size) | |
4c8cf318 | 1007 | { |
4c8cf318 TB |
1008 | struct vdpa_device *vdpa = v->vdpa; |
1009 | const struct vdpa_config_ops *ops = vdpa->config; | |
aaca8373 | 1010 | u32 asid = iotlb_to_asid(iotlb); |
4c8cf318 | 1011 | |
e794070a | 1012 | vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1, asid); |
4c8cf318 | 1013 | |
e794070a | 1014 | if (ops->set_map) { |
25abc060 | 1015 | if (!v->in_batch) |
aaca8373 | 1016 | ops->set_map(vdpa, asid, iotlb); |
25abc060 | 1017 | } |
c82729e0 | 1018 | |
4c8cf318 TB |
1019 | } |
1020 | ||
d8945ec4 | 1021 | static int vhost_vdpa_va_map(struct vhost_vdpa *v, |
ae967246 | 1022 | struct vhost_iotlb *iotlb, |
d8945ec4 XY |
1023 | u64 iova, u64 size, u64 uaddr, u32 perm) |
1024 | { | |
1025 | struct vhost_dev *dev = &v->vdev; | |
1026 | u64 offset, map_size, map_iova = iova; | |
1027 | struct vdpa_map_file *map_file; | |
1028 | struct vm_area_struct *vma; | |
be9c6bad | 1029 | int ret = 0; |
d8945ec4 XY |
1030 | |
1031 | mmap_read_lock(dev->mm); | |
1032 | ||
1033 | while (size) { | |
1034 | vma = find_vma(dev->mm, uaddr); | |
1035 | if (!vma) { | |
1036 | ret = -EINVAL; | |
1037 | break; | |
1038 | } | |
1039 | map_size = min(size, vma->vm_end - uaddr); | |
1040 | if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) && | |
1041 | !(vma->vm_flags & (VM_IO | VM_PFNMAP)))) | |
1042 | goto next; | |
1043 | ||
1044 | map_file = kzalloc(sizeof(*map_file), GFP_KERNEL); | |
1045 | if (!map_file) { | |
1046 | ret = -ENOMEM; | |
1047 | break; | |
1048 | } | |
1049 | offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start; | |
1050 | map_file->offset = offset; | |
1051 | map_file->file = get_file(vma->vm_file); | |
ae967246 | 1052 | ret = vhost_vdpa_map(v, iotlb, map_iova, map_size, uaddr, |
d8945ec4 XY |
1053 | perm, map_file); |
1054 | if (ret) { | |
1055 | fput(map_file->file); | |
1056 | kfree(map_file); | |
1057 | break; | |
1058 | } | |
1059 | next: | |
1060 | size -= map_size; | |
1061 | uaddr += map_size; | |
1062 | map_iova += map_size; | |
1063 | } | |
1064 | if (ret) | |
ae967246 | 1065 | vhost_vdpa_unmap(v, iotlb, iova, map_iova - iova); |
d8945ec4 XY |
1066 | |
1067 | mmap_read_unlock(dev->mm); | |
1068 | ||
1069 | return ret; | |
1070 | } | |
1071 | ||
22af48cf | 1072 | static int vhost_vdpa_pa_map(struct vhost_vdpa *v, |
ae967246 | 1073 | struct vhost_iotlb *iotlb, |
22af48cf | 1074 | u64 iova, u64 size, u64 uaddr, u32 perm) |
4c8cf318 TB |
1075 | { |
1076 | struct vhost_dev *dev = &v->vdev; | |
4c8cf318 | 1077 | struct page **page_list; |
5e1a3149 | 1078 | unsigned long list_size = PAGE_SIZE / sizeof(struct page *); |
4c8cf318 | 1079 | unsigned int gup_flags = FOLL_LONGTERM; |
5e1a3149 | 1080 | unsigned long npages, cur_base, map_pfn, last_pfn = 0; |
ad89653f | 1081 | unsigned long lock_limit, sz2pin, nchunks, i; |
22af48cf | 1082 | u64 start = iova; |
ad89653f | 1083 | long pinned; |
4c8cf318 TB |
1084 | int ret = 0; |
1085 | ||
ad89653f | 1086 | /* Limit the use of memory for bookkeeping */ |
5e1a3149 MT |
1087 | page_list = (struct page **) __get_free_page(GFP_KERNEL); |
1088 | if (!page_list) | |
1089 | return -ENOMEM; | |
1090 | ||
22af48cf | 1091 | if (perm & VHOST_ACCESS_WO) |
4c8cf318 TB |
1092 | gup_flags |= FOLL_WRITE; |
1093 | ||
22af48cf | 1094 | npages = PFN_UP(size + (iova & ~PAGE_MASK)); |
ad89653f SWL |
1095 | if (!npages) { |
1096 | ret = -EINVAL; | |
1097 | goto free; | |
1098 | } | |
4c8cf318 | 1099 | |
d8ed45c5 | 1100 | mmap_read_lock(dev->mm); |
4c8cf318 | 1101 | |
729ce5a5 | 1102 | lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK)); |
ad89653f | 1103 | if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { |
5e1a3149 | 1104 | ret = -ENOMEM; |
ad89653f | 1105 | goto unlock; |
7ed9e3d9 | 1106 | } |
4c8cf318 | 1107 | |
22af48cf | 1108 | cur_base = uaddr & PAGE_MASK; |
7ed9e3d9 | 1109 | iova &= PAGE_MASK; |
ad89653f | 1110 | nchunks = 0; |
5e1a3149 MT |
1111 | |
1112 | while (npages) { | |
ad89653f SWL |
1113 | sz2pin = min_t(unsigned long, npages, list_size); |
1114 | pinned = pin_user_pages(cur_base, sz2pin, | |
4c630f30 | 1115 | gup_flags, page_list); |
ad89653f SWL |
1116 | if (sz2pin != pinned) { |
1117 | if (pinned < 0) { | |
1118 | ret = pinned; | |
1119 | } else { | |
1120 | unpin_user_pages(page_list, pinned); | |
1121 | ret = -ENOMEM; | |
1122 | } | |
5e1a3149 | 1123 | goto out; |
ad89653f SWL |
1124 | } |
1125 | nchunks++; | |
5e1a3149 MT |
1126 | |
1127 | if (!last_pfn) | |
1128 | map_pfn = page_to_pfn(page_list[0]); | |
1129 | ||
ad89653f | 1130 | for (i = 0; i < pinned; i++) { |
5e1a3149 MT |
1131 | unsigned long this_pfn = page_to_pfn(page_list[i]); |
1132 | u64 csize; | |
1133 | ||
1134 | if (last_pfn && (this_pfn != last_pfn + 1)) { | |
1135 | /* Pin a contiguous chunk of memory */ | |
729ce5a5 | 1136 | csize = PFN_PHYS(last_pfn - map_pfn + 1); |
ae967246 | 1137 | ret = vhost_vdpa_map(v, iotlb, iova, csize, |
729ce5a5 | 1138 | PFN_PHYS(map_pfn), |
d8945ec4 | 1139 | perm, NULL); |
ad89653f SWL |
1140 | if (ret) { |
1141 | /* | |
1142 | * Unpin the pages that are left unmapped | |
1143 | * from this point on in the current | |
1144 | * page_list. The remaining outstanding | |
1145 | * ones which may stride across several | |
1146 | * chunks will be covered in the common | |
1147 | * error path subsequently. | |
1148 | */ | |
1149 | unpin_user_pages(&page_list[i], | |
1150 | pinned - i); | |
5e1a3149 | 1151 | goto out; |
ad89653f SWL |
1152 | } |
1153 | ||
5e1a3149 MT |
1154 | map_pfn = this_pfn; |
1155 | iova += csize; | |
ad89653f | 1156 | nchunks = 0; |
4c8cf318 | 1157 | } |
5e1a3149 MT |
1158 | |
1159 | last_pfn = this_pfn; | |
4c8cf318 | 1160 | } |
5e1a3149 | 1161 | |
729ce5a5 | 1162 | cur_base += PFN_PHYS(pinned); |
ad89653f | 1163 | npages -= pinned; |
4c8cf318 TB |
1164 | } |
1165 | ||
5e1a3149 | 1166 | /* Pin the rest chunk */ |
ae967246 | 1167 | ret = vhost_vdpa_map(v, iotlb, iova, PFN_PHYS(last_pfn - map_pfn + 1), |
d8945ec4 | 1168 | PFN_PHYS(map_pfn), perm, NULL); |
4c8cf318 | 1169 | out: |
5e1a3149 | 1170 | if (ret) { |
ad89653f SWL |
1171 | if (nchunks) { |
1172 | unsigned long pfn; | |
1173 | ||
1174 | /* | |
1175 | * Unpin the outstanding pages which are yet to be | |
1176 | * mapped but haven't due to vdpa_map() or | |
1177 | * pin_user_pages() failure. | |
1178 | * | |
1179 | * Mapped pages are accounted in vdpa_map(), hence | |
1180 | * the corresponding unpinning will be handled by | |
1181 | * vdpa_unmap(). | |
1182 | */ | |
1183 | WARN_ON(!last_pfn); | |
1184 | for (pfn = map_pfn; pfn <= last_pfn; pfn++) | |
1185 | unpin_user_page(pfn_to_page(pfn)); | |
1186 | } | |
ae967246 | 1187 | vhost_vdpa_unmap(v, iotlb, start, size); |
5e1a3149 | 1188 | } |
ad89653f | 1189 | unlock: |
d8ed45c5 | 1190 | mmap_read_unlock(dev->mm); |
ad89653f | 1191 | free: |
5e1a3149 | 1192 | free_page((unsigned long)page_list); |
4c8cf318 | 1193 | return ret; |
22af48cf XY |
1194 | |
1195 | } | |
1196 | ||
1197 | static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, | |
ae967246 | 1198 | struct vhost_iotlb *iotlb, |
22af48cf XY |
1199 | struct vhost_iotlb_msg *msg) |
1200 | { | |
d8945ec4 | 1201 | struct vdpa_device *vdpa = v->vdpa; |
22af48cf XY |
1202 | |
1203 | if (msg->iova < v->range.first || !msg->size || | |
1204 | msg->iova > U64_MAX - msg->size + 1 || | |
1205 | msg->iova + msg->size - 1 > v->range.last) | |
1206 | return -EINVAL; | |
1207 | ||
1208 | if (vhost_iotlb_itree_first(iotlb, msg->iova, | |
1209 | msg->iova + msg->size - 1)) | |
1210 | return -EEXIST; | |
1211 | ||
d8945ec4 | 1212 | if (vdpa->use_va) |
ae967246 | 1213 | return vhost_vdpa_va_map(v, iotlb, msg->iova, msg->size, |
d8945ec4 XY |
1214 | msg->uaddr, msg->perm); |
1215 | ||
ae967246 | 1216 | return vhost_vdpa_pa_map(v, iotlb, msg->iova, msg->size, msg->uaddr, |
22af48cf | 1217 | msg->perm); |
4c8cf318 TB |
1218 | } |
1219 | ||
91233ad7 | 1220 | static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid, |
4c8cf318 TB |
1221 | struct vhost_iotlb_msg *msg) |
1222 | { | |
1223 | struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev); | |
25abc060 JW |
1224 | struct vdpa_device *vdpa = v->vdpa; |
1225 | const struct vdpa_config_ops *ops = vdpa->config; | |
aaca8373 GD |
1226 | struct vhost_iotlb *iotlb = NULL; |
1227 | struct vhost_vdpa_as *as = NULL; | |
4c8cf318 TB |
1228 | int r = 0; |
1229 | ||
a9d06452 XY |
1230 | mutex_lock(&dev->mutex); |
1231 | ||
4c8cf318 TB |
1232 | r = vhost_dev_check_owner(dev); |
1233 | if (r) | |
a9d06452 | 1234 | goto unlock; |
4c8cf318 | 1235 | |
aaca8373 GD |
1236 | if (msg->type == VHOST_IOTLB_UPDATE || |
1237 | msg->type == VHOST_IOTLB_BATCH_BEGIN) { | |
1238 | as = vhost_vdpa_find_alloc_as(v, asid); | |
1239 | if (!as) { | |
1240 | dev_err(&v->dev, "can't find and alloc asid %d\n", | |
1241 | asid); | |
1242 | r = -EINVAL; | |
1243 | goto unlock; | |
1244 | } | |
1245 | iotlb = &as->iotlb; | |
1246 | } else | |
1247 | iotlb = asid_to_iotlb(v, asid); | |
1248 | ||
1249 | if ((v->in_batch && v->batch_asid != asid) || !iotlb) { | |
1250 | if (v->in_batch && v->batch_asid != asid) { | |
1251 | dev_info(&v->dev, "batch id %d asid %d\n", | |
1252 | v->batch_asid, asid); | |
1253 | } | |
1254 | if (!iotlb) | |
1255 | dev_err(&v->dev, "no iotlb for asid %d\n", asid); | |
1256 | r = -EINVAL; | |
1257 | goto unlock; | |
1258 | } | |
1259 | ||
4c8cf318 TB |
1260 | switch (msg->type) { |
1261 | case VHOST_IOTLB_UPDATE: | |
ae967246 | 1262 | r = vhost_vdpa_process_iotlb_update(v, iotlb, msg); |
4c8cf318 TB |
1263 | break; |
1264 | case VHOST_IOTLB_INVALIDATE: | |
ae967246 | 1265 | vhost_vdpa_unmap(v, iotlb, msg->iova, msg->size); |
4c8cf318 | 1266 | break; |
25abc060 | 1267 | case VHOST_IOTLB_BATCH_BEGIN: |
aaca8373 | 1268 | v->batch_asid = asid; |
25abc060 JW |
1269 | v->in_batch = true; |
1270 | break; | |
1271 | case VHOST_IOTLB_BATCH_END: | |
1272 | if (v->in_batch && ops->set_map) | |
aaca8373 | 1273 | ops->set_map(vdpa, asid, iotlb); |
25abc060 JW |
1274 | v->in_batch = false; |
1275 | break; | |
4c8cf318 TB |
1276 | default: |
1277 | r = -EINVAL; | |
1278 | break; | |
1279 | } | |
a9d06452 XY |
1280 | unlock: |
1281 | mutex_unlock(&dev->mutex); | |
4c8cf318 TB |
1282 | |
1283 | return r; | |
1284 | } | |
1285 | ||
1286 | static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb, | |
1287 | struct iov_iter *from) | |
1288 | { | |
1289 | struct file *file = iocb->ki_filp; | |
1290 | struct vhost_vdpa *v = file->private_data; | |
1291 | struct vhost_dev *dev = &v->vdev; | |
1292 | ||
1293 | return vhost_chr_write_iter(dev, from); | |
1294 | } | |
1295 | ||
1296 | static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) | |
1297 | { | |
1298 | struct vdpa_device *vdpa = v->vdpa; | |
1299 | const struct vdpa_config_ops *ops = vdpa->config; | |
1300 | struct device *dma_dev = vdpa_get_dma_dev(vdpa); | |
94a11504 | 1301 | const struct bus_type *bus; |
4c8cf318 TB |
1302 | int ret; |
1303 | ||
1304 | /* Device want to do DMA by itself */ | |
1305 | if (ops->set_map || ops->dma_map) | |
1306 | return 0; | |
1307 | ||
1308 | bus = dma_dev->bus; | |
1309 | if (!bus) | |
1310 | return -EFAULT; | |
1311 | ||
6830a6ab AK |
1312 | if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY)) { |
1313 | dev_warn_once(&v->dev, | |
1314 | "Failed to allocate domain, device is not IOMMU cache coherent capable\n"); | |
4c8cf318 | 1315 | return -ENOTSUPP; |
6830a6ab | 1316 | } |
4c8cf318 TB |
1317 | |
1318 | v->domain = iommu_domain_alloc(bus); | |
1319 | if (!v->domain) | |
1320 | return -EIO; | |
1321 | ||
1322 | ret = iommu_attach_device(v->domain, dma_dev); | |
1323 | if (ret) | |
1324 | goto err_attach; | |
1325 | ||
1326 | return 0; | |
1327 | ||
1328 | err_attach: | |
1329 | iommu_domain_free(v->domain); | |
5a522150 | 1330 | v->domain = NULL; |
4c8cf318 TB |
1331 | return ret; |
1332 | } | |
1333 | ||
1334 | static void vhost_vdpa_free_domain(struct vhost_vdpa *v) | |
1335 | { | |
1336 | struct vdpa_device *vdpa = v->vdpa; | |
1337 | struct device *dma_dev = vdpa_get_dma_dev(vdpa); | |
1338 | ||
1339 | if (v->domain) { | |
1340 | iommu_detach_device(v->domain, dma_dev); | |
1341 | iommu_domain_free(v->domain); | |
1342 | } | |
1343 | ||
1344 | v->domain = NULL; | |
1345 | } | |
1346 | ||
1b48dc03 JW |
1347 | static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v) |
1348 | { | |
1349 | struct vdpa_iova_range *range = &v->range; | |
1b48dc03 JW |
1350 | struct vdpa_device *vdpa = v->vdpa; |
1351 | const struct vdpa_config_ops *ops = vdpa->config; | |
1352 | ||
1353 | if (ops->get_iova_range) { | |
1354 | *range = ops->get_iova_range(vdpa); | |
bc9a05ee CH |
1355 | } else if (v->domain && v->domain->geometry.force_aperture) { |
1356 | range->first = v->domain->geometry.aperture_start; | |
1357 | range->last = v->domain->geometry.aperture_end; | |
1b48dc03 JW |
1358 | } else { |
1359 | range->first = 0; | |
1360 | range->last = ULLONG_MAX; | |
1361 | } | |
1362 | } | |
1363 | ||
3d569879 GD |
1364 | static void vhost_vdpa_cleanup(struct vhost_vdpa *v) |
1365 | { | |
aaca8373 GD |
1366 | struct vhost_vdpa_as *as; |
1367 | u32 asid; | |
1368 | ||
aaca8373 GD |
1369 | for (asid = 0; asid < v->vdpa->nas; asid++) { |
1370 | as = asid_to_as(v, asid); | |
1371 | if (as) | |
1372 | vhost_vdpa_remove_as(v, asid); | |
1373 | } | |
c070c191 | 1374 | |
5a522150 | 1375 | vhost_vdpa_free_domain(v); |
c070c191 SG |
1376 | vhost_dev_cleanup(&v->vdev); |
1377 | kfree(v->vdev.vqs); | |
bc91df5c | 1378 | v->vdev.vqs = NULL; |
3d569879 GD |
1379 | } |
1380 | ||
4c8cf318 TB |
1381 | static int vhost_vdpa_open(struct inode *inode, struct file *filep) |
1382 | { | |
1383 | struct vhost_vdpa *v; | |
1384 | struct vhost_dev *dev; | |
1385 | struct vhost_virtqueue **vqs; | |
81d46d69 L |
1386 | int r, opened; |
1387 | u32 i, nvqs; | |
4c8cf318 TB |
1388 | |
1389 | v = container_of(inode->i_cdev, struct vhost_vdpa, cdev); | |
4c8cf318 TB |
1390 | |
1391 | opened = atomic_cmpxchg(&v->opened, 0, 1); | |
1392 | if (opened) | |
1393 | return -EBUSY; | |
1394 | ||
1395 | nvqs = v->nvqs; | |
7f05630d XY |
1396 | r = vhost_vdpa_reset(v); |
1397 | if (r) | |
1398 | goto err; | |
4c8cf318 TB |
1399 | |
1400 | vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL); | |
1401 | if (!vqs) { | |
1402 | r = -ENOMEM; | |
1403 | goto err; | |
1404 | } | |
1405 | ||
1406 | dev = &v->vdev; | |
1407 | for (i = 0; i < nvqs; i++) { | |
1408 | vqs[i] = &v->vqs[i]; | |
1409 | vqs[i]->handle_kick = handle_vq_kick; | |
1410 | } | |
01fcb1cb | 1411 | vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false, |
4c8cf318 TB |
1412 | vhost_vdpa_process_iotlb_msg); |
1413 | ||
4c8cf318 TB |
1414 | r = vhost_vdpa_alloc_domain(v); |
1415 | if (r) | |
aaca8373 | 1416 | goto err_alloc_domain; |
4c8cf318 | 1417 | |
1b48dc03 JW |
1418 | vhost_vdpa_set_iova_range(v); |
1419 | ||
4c8cf318 TB |
1420 | filep->private_data = v; |
1421 | ||
1422 | return 0; | |
1423 | ||
aaca8373 | 1424 | err_alloc_domain: |
3d569879 | 1425 | vhost_vdpa_cleanup(v); |
4c8cf318 TB |
1426 | err: |
1427 | atomic_dec(&v->opened); | |
1428 | return r; | |
1429 | } | |
1430 | ||
2cf1ba9a ZL |
1431 | static void vhost_vdpa_clean_irq(struct vhost_vdpa *v) |
1432 | { | |
81d46d69 | 1433 | u32 i; |
2cf1ba9a | 1434 | |
4c050286 GD |
1435 | for (i = 0; i < v->nvqs; i++) |
1436 | vhost_vdpa_unsetup_vq_irq(v, i); | |
2cf1ba9a ZL |
1437 | } |
1438 | ||
4c8cf318 TB |
1439 | static int vhost_vdpa_release(struct inode *inode, struct file *filep) |
1440 | { | |
1441 | struct vhost_vdpa *v = filep->private_data; | |
1442 | struct vhost_dev *d = &v->vdev; | |
1443 | ||
1444 | mutex_lock(&d->mutex); | |
1445 | filep->private_data = NULL; | |
ea8f17e4 | 1446 | vhost_vdpa_clean_irq(v); |
4c8cf318 TB |
1447 | vhost_vdpa_reset(v); |
1448 | vhost_dev_stop(&v->vdev); | |
9067de47 | 1449 | vhost_vdpa_unbind_mm(v); |
776f3950 | 1450 | vhost_vdpa_config_put(v); |
037d4305 | 1451 | vhost_vdpa_cleanup(v); |
4c8cf318 TB |
1452 | mutex_unlock(&d->mutex); |
1453 | ||
1454 | atomic_dec(&v->opened); | |
1455 | complete(&v->completion); | |
1456 | ||
1457 | return 0; | |
1458 | } | |
1459 | ||
4b4e4867 | 1460 | #ifdef CONFIG_MMU |
ddd89d0a JW |
1461 | static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf) |
1462 | { | |
1463 | struct vhost_vdpa *v = vmf->vma->vm_file->private_data; | |
1464 | struct vdpa_device *vdpa = v->vdpa; | |
1465 | const struct vdpa_config_ops *ops = vdpa->config; | |
1466 | struct vdpa_notification_area notify; | |
1467 | struct vm_area_struct *vma = vmf->vma; | |
1468 | u16 index = vma->vm_pgoff; | |
1469 | ||
1470 | notify = ops->get_vq_notification(vdpa, index); | |
1471 | ||
1472 | vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); | |
1473 | if (remap_pfn_range(vma, vmf->address & PAGE_MASK, | |
729ce5a5 | 1474 | PFN_DOWN(notify.addr), PAGE_SIZE, |
ddd89d0a JW |
1475 | vma->vm_page_prot)) |
1476 | return VM_FAULT_SIGBUS; | |
1477 | ||
1478 | return VM_FAULT_NOPAGE; | |
1479 | } | |
1480 | ||
1481 | static const struct vm_operations_struct vhost_vdpa_vm_ops = { | |
1482 | .fault = vhost_vdpa_fault, | |
1483 | }; | |
1484 | ||
1485 | static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma) | |
1486 | { | |
1487 | struct vhost_vdpa *v = vma->vm_file->private_data; | |
1488 | struct vdpa_device *vdpa = v->vdpa; | |
1489 | const struct vdpa_config_ops *ops = vdpa->config; | |
1490 | struct vdpa_notification_area notify; | |
c09cc2c3 | 1491 | unsigned long index = vma->vm_pgoff; |
ddd89d0a JW |
1492 | |
1493 | if (vma->vm_end - vma->vm_start != PAGE_SIZE) | |
1494 | return -EINVAL; | |
1495 | if ((vma->vm_flags & VM_SHARED) == 0) | |
1496 | return -EINVAL; | |
1497 | if (vma->vm_flags & VM_READ) | |
1498 | return -EINVAL; | |
1499 | if (index > 65535) | |
1500 | return -EINVAL; | |
1501 | if (!ops->get_vq_notification) | |
1502 | return -ENOTSUPP; | |
1503 | ||
1504 | /* To be safe and easily modelled by userspace, We only | |
1505 | * support the doorbell which sits on the page boundary and | |
1506 | * does not share the page with other registers. | |
1507 | */ | |
1508 | notify = ops->get_vq_notification(vdpa, index); | |
1509 | if (notify.addr & (PAGE_SIZE - 1)) | |
1510 | return -EINVAL; | |
1511 | if (vma->vm_end - vma->vm_start != notify.size) | |
1512 | return -ENOTSUPP; | |
1513 | ||
1c71222e | 1514 | vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); |
ddd89d0a JW |
1515 | vma->vm_ops = &vhost_vdpa_vm_ops; |
1516 | return 0; | |
1517 | } | |
4b4e4867 | 1518 | #endif /* CONFIG_MMU */ |
ddd89d0a | 1519 | |
4c8cf318 TB |
1520 | static const struct file_operations vhost_vdpa_fops = { |
1521 | .owner = THIS_MODULE, | |
1522 | .open = vhost_vdpa_open, | |
1523 | .release = vhost_vdpa_release, | |
1524 | .write_iter = vhost_vdpa_chr_write_iter, | |
1525 | .unlocked_ioctl = vhost_vdpa_unlocked_ioctl, | |
4b4e4867 | 1526 | #ifdef CONFIG_MMU |
ddd89d0a | 1527 | .mmap = vhost_vdpa_mmap, |
4b4e4867 | 1528 | #endif /* CONFIG_MMU */ |
4c8cf318 TB |
1529 | .compat_ioctl = compat_ptr_ioctl, |
1530 | }; | |
1531 | ||
1532 | static void vhost_vdpa_release_dev(struct device *device) | |
1533 | { | |
1534 | struct vhost_vdpa *v = | |
1535 | container_of(device, struct vhost_vdpa, dev); | |
1536 | ||
1537 | ida_simple_remove(&vhost_vdpa_ida, v->minor); | |
1538 | kfree(v->vqs); | |
1539 | kfree(v); | |
1540 | } | |
1541 | ||
1542 | static int vhost_vdpa_probe(struct vdpa_device *vdpa) | |
1543 | { | |
1544 | const struct vdpa_config_ops *ops = vdpa->config; | |
1545 | struct vhost_vdpa *v; | |
a9974489 | 1546 | int minor; |
3d569879 | 1547 | int i, r; |
4c8cf318 | 1548 | |
aaca8373 GD |
1549 | /* We can't support platform IOMMU device with more than 1 |
1550 | * group or as | |
1551 | */ | |
1552 | if (!ops->set_map && !ops->dma_map && | |
1553 | (vdpa->ngroups > 1 || vdpa->nas > 1)) | |
db9adcbf GD |
1554 | return -EOPNOTSUPP; |
1555 | ||
4c8cf318 TB |
1556 | v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL); |
1557 | if (!v) | |
1558 | return -ENOMEM; | |
1559 | ||
1560 | minor = ida_simple_get(&vhost_vdpa_ida, 0, | |
1561 | VHOST_VDPA_DEV_MAX, GFP_KERNEL); | |
1562 | if (minor < 0) { | |
1563 | kfree(v); | |
1564 | return minor; | |
1565 | } | |
1566 | ||
1567 | atomic_set(&v->opened, 0); | |
1568 | v->minor = minor; | |
1569 | v->vdpa = vdpa; | |
a9974489 | 1570 | v->nvqs = vdpa->nvqs; |
4c8cf318 TB |
1571 | v->virtio_id = ops->get_device_id(vdpa); |
1572 | ||
1573 | device_initialize(&v->dev); | |
1574 | v->dev.release = vhost_vdpa_release_dev; | |
1575 | v->dev.parent = &vdpa->dev; | |
1576 | v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor); | |
a9974489 | 1577 | v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue), |
4c8cf318 TB |
1578 | GFP_KERNEL); |
1579 | if (!v->vqs) { | |
1580 | r = -ENOMEM; | |
1581 | goto err; | |
1582 | } | |
1583 | ||
1584 | r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor); | |
1585 | if (r) | |
1586 | goto err; | |
1587 | ||
1588 | cdev_init(&v->cdev, &vhost_vdpa_fops); | |
1589 | v->cdev.owner = THIS_MODULE; | |
1590 | ||
1591 | r = cdev_device_add(&v->cdev, &v->dev); | |
1592 | if (r) | |
1593 | goto err; | |
1594 | ||
1595 | init_completion(&v->completion); | |
1596 | vdpa_set_drvdata(vdpa, v); | |
1597 | ||
3d569879 GD |
1598 | for (i = 0; i < VHOST_VDPA_IOTLB_BUCKETS; i++) |
1599 | INIT_HLIST_HEAD(&v->as[i]); | |
1600 | ||
4c8cf318 TB |
1601 | return 0; |
1602 | ||
1603 | err: | |
1604 | put_device(&v->dev); | |
1605 | return r; | |
1606 | } | |
1607 | ||
1608 | static void vhost_vdpa_remove(struct vdpa_device *vdpa) | |
1609 | { | |
1610 | struct vhost_vdpa *v = vdpa_get_drvdata(vdpa); | |
1611 | int opened; | |
1612 | ||
1613 | cdev_device_del(&v->cdev, &v->dev); | |
1614 | ||
1615 | do { | |
1616 | opened = atomic_cmpxchg(&v->opened, 0, 1); | |
1617 | if (!opened) | |
1618 | break; | |
1619 | wait_for_completion(&v->completion); | |
1620 | } while (1); | |
1621 | ||
1622 | put_device(&v->dev); | |
1623 | } | |
1624 | ||
1625 | static struct vdpa_driver vhost_vdpa_driver = { | |
1626 | .driver = { | |
1627 | .name = "vhost_vdpa", | |
1628 | }, | |
1629 | .probe = vhost_vdpa_probe, | |
1630 | .remove = vhost_vdpa_remove, | |
1631 | }; | |
1632 | ||
1633 | static int __init vhost_vdpa_init(void) | |
1634 | { | |
1635 | int r; | |
1636 | ||
1637 | r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX, | |
1638 | "vhost-vdpa"); | |
1639 | if (r) | |
1640 | goto err_alloc_chrdev; | |
1641 | ||
1642 | r = vdpa_register_driver(&vhost_vdpa_driver); | |
1643 | if (r) | |
1644 | goto err_vdpa_register_driver; | |
1645 | ||
1646 | return 0; | |
1647 | ||
1648 | err_vdpa_register_driver: | |
1649 | unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); | |
1650 | err_alloc_chrdev: | |
1651 | return r; | |
1652 | } | |
1653 | module_init(vhost_vdpa_init); | |
1654 | ||
1655 | static void __exit vhost_vdpa_exit(void) | |
1656 | { | |
1657 | vdpa_unregister_driver(&vhost_vdpa_driver); | |
1658 | unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); | |
1659 | } | |
1660 | module_exit(vhost_vdpa_exit); | |
1661 | ||
1662 | MODULE_VERSION("0.0.1"); | |
1663 | MODULE_LICENSE("GPL v2"); | |
1664 | MODULE_AUTHOR("Intel Corporation"); | |
1665 | MODULE_DESCRIPTION("vDPA-based vhost backend for virtio"); |