drm/i915/gvt: Fix possible recursive locking issue
[linux-2.6-block.git] / drivers / gpu / drm / i915 / gvt / kvmgt.c
CommitLineData
f30437c5
JS
1/*
2 * KVMGT - the implementation of Intel mediated pass-through framework for KVM
3 *
4 * Copyright(c) 2014-2016 Intel Corporation. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Kevin Tian <kevin.tian@intel.com>
27 * Jike Song <jike.song@intel.com>
28 * Xiaoguang Chen <xiaoguang.chen@intel.com>
29 */
30
31#include <linux/init.h>
32#include <linux/device.h>
33#include <linux/mm.h>
f440c8a5 34#include <linux/mmu_context.h>
f30437c5
JS
35#include <linux/types.h>
36#include <linux/list.h>
37#include <linux/rbtree.h>
38#include <linux/spinlock.h>
39#include <linux/eventfd.h>
40#include <linux/uuid.h>
41#include <linux/kvm_host.h>
42#include <linux/vfio.h>
659643f7 43#include <linux/mdev.h>
f30437c5
JS
44
45#include "i915_drv.h"
46#include "gvt.h"
47
f30437c5
JS
48static const struct intel_gvt_ops *intel_gvt_ops;
49
f30437c5
JS
50/* helper macros copied from vfio-pci */
51#define VFIO_PCI_OFFSET_SHIFT 40
52#define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT)
53#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
54#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
55
56struct vfio_region {
57 u32 type;
58 u32 subtype;
59 size_t size;
60 u32 flags;
61};
62
63struct kvmgt_pgfn {
64 gfn_t gfn;
65 struct hlist_node hnode;
66};
67
68struct kvmgt_guest_info {
69 struct kvm *kvm;
70 struct intel_vgpu *vgpu;
71 struct kvm_page_track_notifier_node track_node;
72#define NR_BKT (1 << 18)
73 struct hlist_head ptable[NR_BKT];
74#undef NR_BKT
75};
76
77struct gvt_dma {
78 struct rb_node node;
79 gfn_t gfn;
b86dc6ed 80 unsigned long iova;
62d02fd1 81 struct list_head list;
f30437c5
JS
82};
83
659643f7
JS
84static inline bool handle_valid(unsigned long handle)
85{
86 return !!(handle & ~0xff);
87}
88
89static int kvmgt_guest_init(struct mdev_device *mdev);
90static void intel_vgpu_release_work(struct work_struct *work);
91static bool kvmgt_guest_exit(struct kvmgt_guest_info *info);
92
b86dc6ed
CD
93static int gvt_dma_map_iova(struct intel_vgpu *vgpu, kvm_pfn_t pfn,
94 unsigned long *iova)
95{
96 struct page *page;
97 struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
98 dma_addr_t daddr;
99
b6b6fbc8 100 if (unlikely(!pfn_valid(pfn)))
b86dc6ed
CD
101 return -EFAULT;
102
b6b6fbc8 103 page = pfn_to_page(pfn);
b86dc6ed
CD
104 daddr = dma_map_page(dev, page, 0, PAGE_SIZE,
105 PCI_DMA_BIDIRECTIONAL);
106 if (dma_mapping_error(dev, daddr))
107 return -ENOMEM;
108
109 *iova = (unsigned long)(daddr >> PAGE_SHIFT);
110 return 0;
111}
112
113static void gvt_dma_unmap_iova(struct intel_vgpu *vgpu, unsigned long iova)
114{
115 struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
116 dma_addr_t daddr;
117
118 daddr = (dma_addr_t)(iova << PAGE_SHIFT);
119 dma_unmap_page(dev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
120}
121
f30437c5
JS
122static struct gvt_dma *__gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
123{
124 struct rb_node *node = vgpu->vdev.cache.rb_node;
125 struct gvt_dma *ret = NULL;
126
127 while (node) {
128 struct gvt_dma *itr = rb_entry(node, struct gvt_dma, node);
129
130 if (gfn < itr->gfn)
131 node = node->rb_left;
132 else if (gfn > itr->gfn)
133 node = node->rb_right;
134 else {
135 ret = itr;
136 goto out;
137 }
138 }
139
140out:
141 return ret;
142}
143
b86dc6ed 144static unsigned long gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
f30437c5
JS
145{
146 struct gvt_dma *entry;
b86dc6ed 147 unsigned long iova;
f30437c5
JS
148
149 mutex_lock(&vgpu->vdev.cache_lock);
bfeca3e5 150
f30437c5 151 entry = __gvt_cache_find(vgpu, gfn);
b86dc6ed 152 iova = (entry == NULL) ? INTEL_GVT_INVALID_ADDR : entry->iova;
f30437c5 153
bfeca3e5 154 mutex_unlock(&vgpu->vdev.cache_lock);
b86dc6ed 155 return iova;
f30437c5
JS
156}
157
b86dc6ed
CD
158static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
159 unsigned long iova)
f30437c5
JS
160{
161 struct gvt_dma *new, *itr;
162 struct rb_node **link = &vgpu->vdev.cache.rb_node, *parent = NULL;
163
164 new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL);
165 if (!new)
166 return;
167
168 new->gfn = gfn;
b86dc6ed 169 new->iova = iova;
62d02fd1 170 INIT_LIST_HEAD(&new->list);
f30437c5
JS
171
172 mutex_lock(&vgpu->vdev.cache_lock);
173 while (*link) {
174 parent = *link;
175 itr = rb_entry(parent, struct gvt_dma, node);
176
177 if (gfn == itr->gfn)
178 goto out;
179 else if (gfn < itr->gfn)
180 link = &parent->rb_left;
181 else
182 link = &parent->rb_right;
183 }
184
185 rb_link_node(&new->node, parent, link);
186 rb_insert_color(&new->node, &vgpu->vdev.cache);
187 mutex_unlock(&vgpu->vdev.cache_lock);
188 return;
189
190out:
191 mutex_unlock(&vgpu->vdev.cache_lock);
192 kfree(new);
193}
194
195static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
196 struct gvt_dma *entry)
197{
198 rb_erase(&entry->node, &vgpu->vdev.cache);
199 kfree(entry);
200}
201
62d02fd1 202static void intel_vgpu_unpin_work(struct work_struct *work)
f30437c5 203{
62d02fd1
CD
204 struct intel_vgpu *vgpu = container_of(work, struct intel_vgpu,
205 vdev.unpin_work);
99e3123e 206 struct device *dev = mdev_dev(vgpu->vdev.mdev);
f30437c5 207 struct gvt_dma *this;
62d02fd1
CD
208 unsigned long gfn;
209
210 for (;;) {
211 spin_lock(&vgpu->vdev.unpin_lock);
212 if (list_empty(&vgpu->vdev.unpin_list)) {
213 spin_unlock(&vgpu->vdev.unpin_lock);
214 break;
215 }
216 this = list_first_entry(&vgpu->vdev.unpin_list,
217 struct gvt_dma, list);
218 list_del(&this->list);
219 spin_unlock(&vgpu->vdev.unpin_lock);
220
221 gfn = this->gfn;
222 vfio_unpin_pages(dev, &gfn, 1);
223 kfree(this);
224 }
225}
226
227static bool gvt_cache_mark_remove(struct intel_vgpu *vgpu, gfn_t gfn)
228{
229 struct gvt_dma *this;
f30437c5
JS
230
231 mutex_lock(&vgpu->vdev.cache_lock);
232 this = __gvt_cache_find(vgpu, gfn);
233 if (!this) {
234 mutex_unlock(&vgpu->vdev.cache_lock);
62d02fd1 235 return false;
f30437c5 236 }
b86dc6ed 237 gvt_dma_unmap_iova(vgpu, this->iova);
62d02fd1
CD
238 /* remove this from rb tree */
239 rb_erase(&this->node, &vgpu->vdev.cache);
f30437c5 240 mutex_unlock(&vgpu->vdev.cache_lock);
62d02fd1
CD
241
242 /* put this to the unpin_list */
243 spin_lock(&vgpu->vdev.unpin_lock);
244 list_move_tail(&this->list, &vgpu->vdev.unpin_list);
245 spin_unlock(&vgpu->vdev.unpin_lock);
246
247 return true;
f30437c5
JS
248}
249
250static void gvt_cache_init(struct intel_vgpu *vgpu)
251{
252 vgpu->vdev.cache = RB_ROOT;
253 mutex_init(&vgpu->vdev.cache_lock);
254}
255
256static void gvt_cache_destroy(struct intel_vgpu *vgpu)
257{
258 struct gvt_dma *dma;
259 struct rb_node *node = NULL;
99e3123e 260 struct device *dev = mdev_dev(vgpu->vdev.mdev);
659643f7 261 unsigned long gfn;
f30437c5
JS
262
263 mutex_lock(&vgpu->vdev.cache_lock);
264 while ((node = rb_first(&vgpu->vdev.cache))) {
265 dma = rb_entry(node, struct gvt_dma, node);
b86dc6ed 266 gvt_dma_unmap_iova(vgpu, dma->iova);
659643f7 267 gfn = dma->gfn;
f30437c5 268
659643f7 269 vfio_unpin_pages(dev, &gfn, 1);
f30437c5
JS
270 __gvt_cache_remove_entry(vgpu, dma);
271 }
272 mutex_unlock(&vgpu->vdev.cache_lock);
273}
274
275static struct intel_vgpu_type *intel_gvt_find_vgpu_type(struct intel_gvt *gvt,
276 const char *name)
277{
278 int i;
279 struct intel_vgpu_type *t;
280 const char *driver_name = dev_driver_string(
281 &gvt->dev_priv->drm.pdev->dev);
282
283 for (i = 0; i < gvt->num_types; i++) {
284 t = &gvt->types[i];
285 if (!strncmp(t->name, name + strlen(driver_name) + 1,
286 sizeof(t->name)))
287 return t;
288 }
289
290 return NULL;
291}
292
bdbfd519
AW
293static ssize_t available_instances_show(struct kobject *kobj,
294 struct device *dev, char *buf)
659643f7
JS
295{
296 struct intel_vgpu_type *type;
297 unsigned int num = 0;
298 void *gvt = kdev_to_i915(dev)->gvt;
299
300 type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
301 if (!type)
302 num = 0;
303 else
304 num = type->avail_instance;
305
306 return sprintf(buf, "%u\n", num);
307}
308
309static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
310 char *buf)
311{
312 return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
313}
314
315static ssize_t description_show(struct kobject *kobj, struct device *dev,
316 char *buf)
317{
318 struct intel_vgpu_type *type;
319 void *gvt = kdev_to_i915(dev)->gvt;
320
321 type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
322 if (!type)
323 return 0;
324
325 return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
bc90d097
PG
326 "fence: %d\nresolution: %s\n"
327 "weight: %d\n",
d1a513be
ZW
328 BYTES_TO_MB(type->low_gm_size),
329 BYTES_TO_MB(type->high_gm_size),
bc90d097
PG
330 type->fence, vgpu_edid_str(type->resolution),
331 type->weight);
659643f7
JS
332}
333
bdbfd519 334static MDEV_TYPE_ATTR_RO(available_instances);
659643f7
JS
335static MDEV_TYPE_ATTR_RO(device_api);
336static MDEV_TYPE_ATTR_RO(description);
337
f30437c5 338static struct attribute *type_attrs[] = {
bdbfd519 339 &mdev_type_attr_available_instances.attr,
659643f7
JS
340 &mdev_type_attr_device_api.attr,
341 &mdev_type_attr_description.attr,
f30437c5
JS
342 NULL,
343};
344
345static struct attribute_group *intel_vgpu_type_groups[] = {
346 [0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL,
347};
348
349static bool intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt)
350{
351 int i, j;
352 struct intel_vgpu_type *type;
353 struct attribute_group *group;
354
355 for (i = 0; i < gvt->num_types; i++) {
356 type = &gvt->types[i];
357
358 group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL);
359 if (WARN_ON(!group))
360 goto unwind;
361
362 group->name = type->name;
363 group->attrs = type_attrs;
364 intel_vgpu_type_groups[i] = group;
365 }
366
367 return true;
368
369unwind:
370 for (j = 0; j < i; j++) {
371 group = intel_vgpu_type_groups[j];
372 kfree(group);
373 }
374
375 return false;
376}
377
378static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
379{
380 int i;
381 struct attribute_group *group;
382
383 for (i = 0; i < gvt->num_types; i++) {
384 group = intel_vgpu_type_groups[i];
385 kfree(group);
386 }
387}
388
389static void kvmgt_protect_table_init(struct kvmgt_guest_info *info)
390{
391 hash_init(info->ptable);
392}
393
394static void kvmgt_protect_table_destroy(struct kvmgt_guest_info *info)
395{
396 struct kvmgt_pgfn *p;
397 struct hlist_node *tmp;
398 int i;
399
400 hash_for_each_safe(info->ptable, i, tmp, p, hnode) {
401 hash_del(&p->hnode);
402 kfree(p);
403 }
404}
405
406static struct kvmgt_pgfn *
407__kvmgt_protect_table_find(struct kvmgt_guest_info *info, gfn_t gfn)
408{
409 struct kvmgt_pgfn *p, *res = NULL;
410
411 hash_for_each_possible(info->ptable, p, hnode, gfn) {
412 if (gfn == p->gfn) {
413 res = p;
414 break;
415 }
416 }
417
418 return res;
419}
420
421static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info *info,
422 gfn_t gfn)
423{
424 struct kvmgt_pgfn *p;
425
426 p = __kvmgt_protect_table_find(info, gfn);
427 return !!p;
428}
429
430static void kvmgt_protect_table_add(struct kvmgt_guest_info *info, gfn_t gfn)
431{
432 struct kvmgt_pgfn *p;
433
434 if (kvmgt_gfn_is_write_protected(info, gfn))
435 return;
436
c55b1de0 437 p = kzalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC);
f30437c5
JS
438 if (WARN(!p, "gfn: 0x%llx\n", gfn))
439 return;
440
441 p->gfn = gfn;
442 hash_add(info->ptable, &p->hnode, gfn);
443}
444
445static void kvmgt_protect_table_del(struct kvmgt_guest_info *info,
446 gfn_t gfn)
447{
448 struct kvmgt_pgfn *p;
449
450 p = __kvmgt_protect_table_find(info, gfn);
451 if (p) {
452 hash_del(&p->hnode);
453 kfree(p);
454 }
455}
456
659643f7
JS
457static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
458{
695fbc08 459 struct intel_vgpu *vgpu = NULL;
659643f7
JS
460 struct intel_vgpu_type *type;
461 struct device *pdev;
462 void *gvt;
5753394b 463 int ret;
659643f7 464
9372e6fe 465 pdev = mdev_parent_dev(mdev);
659643f7
JS
466 gvt = kdev_to_i915(pdev)->gvt;
467
468 type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
469 if (!type) {
695fbc08 470 gvt_vgpu_err("failed to find type %s to create\n",
659643f7 471 kobject_name(kobj));
5753394b
JS
472 ret = -EINVAL;
473 goto out;
659643f7
JS
474 }
475
476 vgpu = intel_gvt_ops->vgpu_create(gvt, type);
477 if (IS_ERR_OR_NULL(vgpu)) {
5753394b 478 ret = vgpu == NULL ? -EFAULT : PTR_ERR(vgpu);
695fbc08 479 gvt_vgpu_err("failed to create intel vgpu: %d\n", ret);
5753394b 480 goto out;
659643f7
JS
481 }
482
483 INIT_WORK(&vgpu->vdev.release_work, intel_vgpu_release_work);
62d02fd1
CD
484 INIT_WORK(&vgpu->vdev.unpin_work, intel_vgpu_unpin_work);
485 spin_lock_init(&vgpu->vdev.unpin_lock);
486 INIT_LIST_HEAD(&vgpu->vdev.unpin_list);
659643f7
JS
487
488 vgpu->vdev.mdev = mdev;
489 mdev_set_drvdata(mdev, vgpu);
490
491 gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n",
99e3123e 492 dev_name(mdev_dev(mdev)));
5753394b
JS
493 ret = 0;
494
495out:
496 return ret;
659643f7
JS
497}
498
499static int intel_vgpu_remove(struct mdev_device *mdev)
500{
501 struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
502
503 if (handle_valid(vgpu->handle))
504 return -EBUSY;
505
506 intel_gvt_ops->vgpu_destroy(vgpu);
507 return 0;
508}
509
510static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
511 unsigned long action, void *data)
512{
513 struct intel_vgpu *vgpu = container_of(nb,
514 struct intel_vgpu,
515 vdev.iommu_notifier);
62d02fd1 516 bool sched_unmap = false;
659643f7
JS
517
518 if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
519 struct vfio_iommu_type1_dma_unmap *unmap = data;
520 unsigned long gfn, end_gfn;
521
522 gfn = unmap->iova >> PAGE_SHIFT;
523 end_gfn = gfn + unmap->size / PAGE_SIZE;
524
525 while (gfn < end_gfn)
62d02fd1
CD
526 sched_unmap |= gvt_cache_mark_remove(vgpu, gfn++);
527
528 if (sched_unmap)
529 schedule_work(&vgpu->vdev.unpin_work);
659643f7
JS
530 }
531
532 return NOTIFY_OK;
533}
534
535static int intel_vgpu_group_notifier(struct notifier_block *nb,
536 unsigned long action, void *data)
537{
538 struct intel_vgpu *vgpu = container_of(nb,
539 struct intel_vgpu,
540 vdev.group_notifier);
541
542 /* the only action we care about */
543 if (action == VFIO_GROUP_NOTIFY_SET_KVM) {
544 vgpu->vdev.kvm = data;
545
546 if (!data)
547 schedule_work(&vgpu->vdev.release_work);
548 }
549
550 return NOTIFY_OK;
551}
552
553static int intel_vgpu_open(struct mdev_device *mdev)
554{
555 struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
556 unsigned long events;
557 int ret;
558
559 vgpu->vdev.iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
560 vgpu->vdev.group_notifier.notifier_call = intel_vgpu_group_notifier;
561
562 events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
99e3123e 563 ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &events,
659643f7
JS
564 &vgpu->vdev.iommu_notifier);
565 if (ret != 0) {
695fbc08
TZ
566 gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n",
567 ret);
659643f7
JS
568 goto out;
569 }
570
571 events = VFIO_GROUP_NOTIFY_SET_KVM;
99e3123e 572 ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &events,
659643f7
JS
573 &vgpu->vdev.group_notifier);
574 if (ret != 0) {
695fbc08
TZ
575 gvt_vgpu_err("vfio_register_notifier for group failed: %d\n",
576 ret);
659643f7
JS
577 goto undo_iommu;
578 }
579
364fb6b7
JS
580 ret = kvmgt_guest_init(mdev);
581 if (ret)
582 goto undo_group;
583
b79c52ae
ZW
584 intel_gvt_ops->vgpu_activate(vgpu);
585
364fb6b7
JS
586 atomic_set(&vgpu->vdev.released, 0);
587 return ret;
588
589undo_group:
5824f924 590 vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
364fb6b7 591 &vgpu->vdev.group_notifier);
659643f7
JS
592
593undo_iommu:
99e3123e 594 vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
659643f7
JS
595 &vgpu->vdev.iommu_notifier);
596out:
597 return ret;
598}
599
600static void __intel_vgpu_release(struct intel_vgpu *vgpu)
601{
602 struct kvmgt_guest_info *info;
364fb6b7 603 int ret;
659643f7
JS
604
605 if (!handle_valid(vgpu->handle))
606 return;
607
364fb6b7
JS
608 if (atomic_cmpxchg(&vgpu->vdev.released, 0, 1))
609 return;
610
b79c52ae
ZW
611 intel_gvt_ops->vgpu_deactivate(vgpu);
612
5824f924 613 ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_IOMMU_NOTIFY,
659643f7 614 &vgpu->vdev.iommu_notifier);
364fb6b7
JS
615 WARN(ret, "vfio_unregister_notifier for iommu failed: %d\n", ret);
616
5824f924 617 ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_GROUP_NOTIFY,
659643f7 618 &vgpu->vdev.group_notifier);
364fb6b7 619 WARN(ret, "vfio_unregister_notifier for group failed: %d\n", ret);
659643f7
JS
620
621 info = (struct kvmgt_guest_info *)vgpu->handle;
622 kvmgt_guest_exit(info);
364fb6b7
JS
623
624 vgpu->vdev.kvm = NULL;
659643f7
JS
625 vgpu->handle = 0;
626}
627
628static void intel_vgpu_release(struct mdev_device *mdev)
629{
630 struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
631
632 __intel_vgpu_release(vgpu);
633}
634
635static void intel_vgpu_release_work(struct work_struct *work)
636{
637 struct intel_vgpu *vgpu = container_of(work, struct intel_vgpu,
638 vdev.release_work);
8ff842fd 639
659643f7
JS
640 __intel_vgpu_release(vgpu);
641}
642
643static uint64_t intel_vgpu_get_bar0_addr(struct intel_vgpu *vgpu)
644{
645 u32 start_lo, start_hi;
646 u32 mem_type;
647 int pos = PCI_BASE_ADDRESS_0;
648
649 start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + pos)) &
650 PCI_BASE_ADDRESS_MEM_MASK;
651 mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + pos)) &
652 PCI_BASE_ADDRESS_MEM_TYPE_MASK;
653
654 switch (mem_type) {
655 case PCI_BASE_ADDRESS_MEM_TYPE_64:
656 start_hi = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space
657 + pos + 4));
658 break;
659 case PCI_BASE_ADDRESS_MEM_TYPE_32:
660 case PCI_BASE_ADDRESS_MEM_TYPE_1M:
661 /* 1M mem BAR treated as 32-bit BAR */
662 default:
663 /* mem unknown type treated as 32-bit BAR */
664 start_hi = 0;
665 break;
666 }
667
668 return ((u64)start_hi << 32) | start_lo;
669}
670
671static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
672 size_t count, loff_t *ppos, bool is_write)
673{
674 struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
675 unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
676 uint64_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
677 int ret = -EINVAL;
678
679
680 if (index >= VFIO_PCI_NUM_REGIONS) {
695fbc08 681 gvt_vgpu_err("invalid index: %u\n", index);
659643f7
JS
682 return -EINVAL;
683 }
684
685 switch (index) {
686 case VFIO_PCI_CONFIG_REGION_INDEX:
687 if (is_write)
688 ret = intel_gvt_ops->emulate_cfg_write(vgpu, pos,
689 buf, count);
690 else
691 ret = intel_gvt_ops->emulate_cfg_read(vgpu, pos,
692 buf, count);
693 break;
694 case VFIO_PCI_BAR0_REGION_INDEX:
695 case VFIO_PCI_BAR1_REGION_INDEX:
696 if (is_write) {
697 uint64_t bar0_start = intel_vgpu_get_bar0_addr(vgpu);
698
699 ret = intel_gvt_ops->emulate_mmio_write(vgpu,
700 bar0_start + pos, buf, count);
701 } else {
702 uint64_t bar0_start = intel_vgpu_get_bar0_addr(vgpu);
703
704 ret = intel_gvt_ops->emulate_mmio_read(vgpu,
705 bar0_start + pos, buf, count);
706 }
707 break;
708 case VFIO_PCI_BAR2_REGION_INDEX:
709 case VFIO_PCI_BAR3_REGION_INDEX:
710 case VFIO_PCI_BAR4_REGION_INDEX:
711 case VFIO_PCI_BAR5_REGION_INDEX:
712 case VFIO_PCI_VGA_REGION_INDEX:
713 case VFIO_PCI_ROM_REGION_INDEX:
714 default:
695fbc08 715 gvt_vgpu_err("unsupported region: %u\n", index);
659643f7
JS
716 }
717
718 return ret == 0 ? count : ret;
719}
720
721static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
722 size_t count, loff_t *ppos)
723{
724 unsigned int done = 0;
725 int ret;
726
727 while (count) {
728 size_t filled;
729
730 if (count >= 4 && !(*ppos % 4)) {
731 u32 val;
732
733 ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
734 ppos, false);
735 if (ret <= 0)
736 goto read_err;
737
738 if (copy_to_user(buf, &val, sizeof(val)))
739 goto read_err;
740
741 filled = 4;
742 } else if (count >= 2 && !(*ppos % 2)) {
743 u16 val;
744
745 ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
746 ppos, false);
747 if (ret <= 0)
748 goto read_err;
749
750 if (copy_to_user(buf, &val, sizeof(val)))
751 goto read_err;
752
753 filled = 2;
754 } else {
755 u8 val;
756
757 ret = intel_vgpu_rw(mdev, &val, sizeof(val), ppos,
758 false);
759 if (ret <= 0)
760 goto read_err;
761
762 if (copy_to_user(buf, &val, sizeof(val)))
763 goto read_err;
764
765 filled = 1;
766 }
767
768 count -= filled;
769 done += filled;
770 *ppos += filled;
771 buf += filled;
772 }
773
774 return done;
775
776read_err:
777 return -EFAULT;
778}
779
780static ssize_t intel_vgpu_write(struct mdev_device *mdev,
781 const char __user *buf,
782 size_t count, loff_t *ppos)
783{
784 unsigned int done = 0;
785 int ret;
786
787 while (count) {
788 size_t filled;
789
790 if (count >= 4 && !(*ppos % 4)) {
791 u32 val;
792
793 if (copy_from_user(&val, buf, sizeof(val)))
794 goto write_err;
795
796 ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
797 ppos, true);
798 if (ret <= 0)
799 goto write_err;
800
801 filled = 4;
802 } else if (count >= 2 && !(*ppos % 2)) {
803 u16 val;
804
805 if (copy_from_user(&val, buf, sizeof(val)))
806 goto write_err;
807
808 ret = intel_vgpu_rw(mdev, (char *)&val,
809 sizeof(val), ppos, true);
810 if (ret <= 0)
811 goto write_err;
812
813 filled = 2;
814 } else {
815 u8 val;
816
817 if (copy_from_user(&val, buf, sizeof(val)))
818 goto write_err;
819
820 ret = intel_vgpu_rw(mdev, &val, sizeof(val),
821 ppos, true);
822 if (ret <= 0)
823 goto write_err;
824
825 filled = 1;
826 }
827
828 count -= filled;
829 done += filled;
830 *ppos += filled;
831 buf += filled;
832 }
833
834 return done;
835write_err:
836 return -EFAULT;
837}
838
839static int intel_vgpu_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
840{
841 unsigned int index;
842 u64 virtaddr;
843 unsigned long req_size, pgoff = 0;
844 pgprot_t pg_prot;
845 struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
846
847 index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
848 if (index >= VFIO_PCI_ROM_REGION_INDEX)
849 return -EINVAL;
850
851 if (vma->vm_end < vma->vm_start)
852 return -EINVAL;
853 if ((vma->vm_flags & VM_SHARED) == 0)
854 return -EINVAL;
855 if (index != VFIO_PCI_BAR2_REGION_INDEX)
856 return -EINVAL;
857
858 pg_prot = vma->vm_page_prot;
859 virtaddr = vma->vm_start;
860 req_size = vma->vm_end - vma->vm_start;
861 pgoff = vgpu_aperture_pa_base(vgpu) >> PAGE_SHIFT;
862
863 return remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot);
864}
865
866static int intel_vgpu_get_irq_count(struct intel_vgpu *vgpu, int type)
867{
868 if (type == VFIO_PCI_INTX_IRQ_INDEX || type == VFIO_PCI_MSI_IRQ_INDEX)
869 return 1;
870
871 return 0;
872}
873
874static int intel_vgpu_set_intx_mask(struct intel_vgpu *vgpu,
875 unsigned int index, unsigned int start,
876 unsigned int count, uint32_t flags,
877 void *data)
878{
879 return 0;
880}
881
882static int intel_vgpu_set_intx_unmask(struct intel_vgpu *vgpu,
883 unsigned int index, unsigned int start,
884 unsigned int count, uint32_t flags, void *data)
885{
886 return 0;
887}
888
889static int intel_vgpu_set_intx_trigger(struct intel_vgpu *vgpu,
890 unsigned int index, unsigned int start, unsigned int count,
891 uint32_t flags, void *data)
892{
893 return 0;
894}
895
896static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu,
897 unsigned int index, unsigned int start, unsigned int count,
898 uint32_t flags, void *data)
899{
900 struct eventfd_ctx *trigger;
901
902 if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
903 int fd = *(int *)data;
904
905 trigger = eventfd_ctx_fdget(fd);
906 if (IS_ERR(trigger)) {
695fbc08 907 gvt_vgpu_err("eventfd_ctx_fdget failed\n");
659643f7
JS
908 return PTR_ERR(trigger);
909 }
910 vgpu->vdev.msi_trigger = trigger;
911 }
912
913 return 0;
914}
915
916static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, uint32_t flags,
917 unsigned int index, unsigned int start, unsigned int count,
918 void *data)
919{
920 int (*func)(struct intel_vgpu *vgpu, unsigned int index,
921 unsigned int start, unsigned int count, uint32_t flags,
922 void *data) = NULL;
923
924 switch (index) {
925 case VFIO_PCI_INTX_IRQ_INDEX:
926 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
927 case VFIO_IRQ_SET_ACTION_MASK:
928 func = intel_vgpu_set_intx_mask;
929 break;
930 case VFIO_IRQ_SET_ACTION_UNMASK:
931 func = intel_vgpu_set_intx_unmask;
932 break;
933 case VFIO_IRQ_SET_ACTION_TRIGGER:
934 func = intel_vgpu_set_intx_trigger;
935 break;
936 }
937 break;
938 case VFIO_PCI_MSI_IRQ_INDEX:
939 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
940 case VFIO_IRQ_SET_ACTION_MASK:
941 case VFIO_IRQ_SET_ACTION_UNMASK:
942 /* XXX Need masking support exported */
943 break;
944 case VFIO_IRQ_SET_ACTION_TRIGGER:
945 func = intel_vgpu_set_msi_trigger;
946 break;
947 }
948 break;
949 }
950
951 if (!func)
952 return -ENOTTY;
953
954 return func(vgpu, index, start, count, flags, data);
955}
956
957static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
958 unsigned long arg)
959{
960 struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
961 unsigned long minsz;
962
963 gvt_dbg_core("vgpu%d ioctl, cmd: %d\n", vgpu->id, cmd);
964
965 if (cmd == VFIO_DEVICE_GET_INFO) {
966 struct vfio_device_info info;
967
968 minsz = offsetofend(struct vfio_device_info, num_irqs);
969
970 if (copy_from_user(&info, (void __user *)arg, minsz))
971 return -EFAULT;
972
973 if (info.argsz < minsz)
974 return -EINVAL;
975
976 info.flags = VFIO_DEVICE_FLAGS_PCI;
977 info.flags |= VFIO_DEVICE_FLAGS_RESET;
978 info.num_regions = VFIO_PCI_NUM_REGIONS;
979 info.num_irqs = VFIO_PCI_NUM_IRQS;
980
981 return copy_to_user((void __user *)arg, &info, minsz) ?
982 -EFAULT : 0;
983
984 } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
985 struct vfio_region_info info;
986 struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
987 int i, ret;
988 struct vfio_region_info_cap_sparse_mmap *sparse = NULL;
989 size_t size;
990 int nr_areas = 1;
991 int cap_type_id;
992
993 minsz = offsetofend(struct vfio_region_info, offset);
994
995 if (copy_from_user(&info, (void __user *)arg, minsz))
996 return -EFAULT;
997
998 if (info.argsz < minsz)
999 return -EINVAL;
1000
1001 switch (info.index) {
1002 case VFIO_PCI_CONFIG_REGION_INDEX:
1003 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1004 info.size = INTEL_GVT_MAX_CFG_SPACE_SZ;
1005 info.flags = VFIO_REGION_INFO_FLAG_READ |
1006 VFIO_REGION_INFO_FLAG_WRITE;
1007 break;
1008 case VFIO_PCI_BAR0_REGION_INDEX:
1009 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1010 info.size = vgpu->cfg_space.bar[info.index].size;
1011 if (!info.size) {
1012 info.flags = 0;
1013 break;
1014 }
1015
1016 info.flags = VFIO_REGION_INFO_FLAG_READ |
1017 VFIO_REGION_INFO_FLAG_WRITE;
1018 break;
1019 case VFIO_PCI_BAR1_REGION_INDEX:
1020 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1021 info.size = 0;
1022 info.flags = 0;
1023 break;
1024 case VFIO_PCI_BAR2_REGION_INDEX:
1025 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1026 info.flags = VFIO_REGION_INFO_FLAG_CAPS |
1027 VFIO_REGION_INFO_FLAG_MMAP |
1028 VFIO_REGION_INFO_FLAG_READ |
1029 VFIO_REGION_INFO_FLAG_WRITE;
1030 info.size = gvt_aperture_sz(vgpu->gvt);
1031
1032 size = sizeof(*sparse) +
1033 (nr_areas * sizeof(*sparse->areas));
1034 sparse = kzalloc(size, GFP_KERNEL);
1035 if (!sparse)
1036 return -ENOMEM;
1037
1038 sparse->nr_areas = nr_areas;
1039 cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
1040 sparse->areas[0].offset =
1041 PAGE_ALIGN(vgpu_aperture_offset(vgpu));
1042 sparse->areas[0].size = vgpu_aperture_sz(vgpu);
659643f7
JS
1043 break;
1044
1045 case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
1046 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1047 info.size = 0;
1048
1049 info.flags = 0;
1050 gvt_dbg_core("get region info bar:%d\n", info.index);
1051 break;
1052
1053 case VFIO_PCI_ROM_REGION_INDEX:
1054 case VFIO_PCI_VGA_REGION_INDEX:
1055 gvt_dbg_core("get region info index:%d\n", info.index);
1056 break;
1057 default:
1058 {
1059 struct vfio_region_info_cap_type cap_type;
1060
1061 if (info.index >= VFIO_PCI_NUM_REGIONS +
1062 vgpu->vdev.num_regions)
1063 return -EINVAL;
1064
1065 i = info.index - VFIO_PCI_NUM_REGIONS;
1066
1067 info.offset =
1068 VFIO_PCI_INDEX_TO_OFFSET(info.index);
1069 info.size = vgpu->vdev.region[i].size;
1070 info.flags = vgpu->vdev.region[i].flags;
1071
1072 cap_type.type = vgpu->vdev.region[i].type;
1073 cap_type.subtype = vgpu->vdev.region[i].subtype;
1074
1075 ret = vfio_info_add_capability(&caps,
1076 VFIO_REGION_INFO_CAP_TYPE,
1077 &cap_type);
1078 if (ret)
1079 return ret;
1080 }
1081 }
1082
1083 if ((info.flags & VFIO_REGION_INFO_FLAG_CAPS) && sparse) {
1084 switch (cap_type_id) {
1085 case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
1086 ret = vfio_info_add_capability(&caps,
1087 VFIO_REGION_INFO_CAP_SPARSE_MMAP,
1088 sparse);
1089 kfree(sparse);
1090 if (ret)
1091 return ret;
1092 break;
1093 default:
1094 return -EINVAL;
1095 }
1096 }
1097
1098 if (caps.size) {
1099 if (info.argsz < sizeof(info) + caps.size) {
1100 info.argsz = sizeof(info) + caps.size;
1101 info.cap_offset = 0;
1102 } else {
1103 vfio_info_cap_shift(&caps, sizeof(info));
1104 if (copy_to_user((void __user *)arg +
1105 sizeof(info), caps.buf,
1106 caps.size)) {
1107 kfree(caps.buf);
1108 return -EFAULT;
1109 }
1110 info.cap_offset = sizeof(info);
1111 }
1112
1113 kfree(caps.buf);
1114 }
1115
1116 return copy_to_user((void __user *)arg, &info, minsz) ?
1117 -EFAULT : 0;
1118 } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
1119 struct vfio_irq_info info;
1120
1121 minsz = offsetofend(struct vfio_irq_info, count);
1122
1123 if (copy_from_user(&info, (void __user *)arg, minsz))
1124 return -EFAULT;
1125
1126 if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
1127 return -EINVAL;
1128
1129 switch (info.index) {
1130 case VFIO_PCI_INTX_IRQ_INDEX:
1131 case VFIO_PCI_MSI_IRQ_INDEX:
1132 break;
1133 default:
1134 return -EINVAL;
1135 }
1136
1137 info.flags = VFIO_IRQ_INFO_EVENTFD;
1138
1139 info.count = intel_vgpu_get_irq_count(vgpu, info.index);
1140
1141 if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
1142 info.flags |= (VFIO_IRQ_INFO_MASKABLE |
1143 VFIO_IRQ_INFO_AUTOMASKED);
1144 else
1145 info.flags |= VFIO_IRQ_INFO_NORESIZE;
1146
1147 return copy_to_user((void __user *)arg, &info, minsz) ?
1148 -EFAULT : 0;
1149 } else if (cmd == VFIO_DEVICE_SET_IRQS) {
1150 struct vfio_irq_set hdr;
1151 u8 *data = NULL;
1152 int ret = 0;
1153 size_t data_size = 0;
1154
1155 minsz = offsetofend(struct vfio_irq_set, count);
1156
1157 if (copy_from_user(&hdr, (void __user *)arg, minsz))
1158 return -EFAULT;
1159
1160 if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) {
1161 int max = intel_vgpu_get_irq_count(vgpu, hdr.index);
1162
1163 ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
1164 VFIO_PCI_NUM_IRQS, &data_size);
1165 if (ret) {
695fbc08 1166 gvt_vgpu_err("intel:vfio_set_irqs_validate_and_prepare failed\n");
659643f7
JS
1167 return -EINVAL;
1168 }
1169 if (data_size) {
1170 data = memdup_user((void __user *)(arg + minsz),
1171 data_size);
1172 if (IS_ERR(data))
1173 return PTR_ERR(data);
1174 }
1175 }
1176
1177 ret = intel_vgpu_set_irqs(vgpu, hdr.flags, hdr.index,
1178 hdr.start, hdr.count, data);
1179 kfree(data);
1180
1181 return ret;
1182 } else if (cmd == VFIO_DEVICE_RESET) {
1183 intel_gvt_ops->vgpu_reset(vgpu);
1184 return 0;
1185 }
1186
1187 return 0;
1188}
1189
7a7a6561
ZW
1190static ssize_t
1191vgpu_id_show(struct device *dev, struct device_attribute *attr,
1192 char *buf)
1193{
1194 struct mdev_device *mdev = mdev_from_dev(dev);
1195
1196 if (mdev) {
1197 struct intel_vgpu *vgpu = (struct intel_vgpu *)
1198 mdev_get_drvdata(mdev);
1199 return sprintf(buf, "%d\n", vgpu->id);
1200 }
1201 return sprintf(buf, "\n");
1202}
1203
1204static DEVICE_ATTR_RO(vgpu_id);
1205
1206static struct attribute *intel_vgpu_attrs[] = {
1207 &dev_attr_vgpu_id.attr,
1208 NULL
1209};
1210
1211static const struct attribute_group intel_vgpu_group = {
1212 .name = "intel_vgpu",
1213 .attrs = intel_vgpu_attrs,
1214};
1215
1216static const struct attribute_group *intel_vgpu_groups[] = {
1217 &intel_vgpu_group,
1218 NULL,
1219};
1220
42930553 1221static const struct mdev_parent_ops intel_vgpu_ops = {
659643f7 1222 .supported_type_groups = intel_vgpu_type_groups,
7a7a6561 1223 .mdev_attr_groups = intel_vgpu_groups,
659643f7
JS
1224 .create = intel_vgpu_create,
1225 .remove = intel_vgpu_remove,
1226
1227 .open = intel_vgpu_open,
1228 .release = intel_vgpu_release,
1229
1230 .read = intel_vgpu_read,
1231 .write = intel_vgpu_write,
1232 .mmap = intel_vgpu_mmap,
1233 .ioctl = intel_vgpu_ioctl,
1234};
1235
f30437c5
JS
1236static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops)
1237{
1238 if (!intel_gvt_init_vgpu_type_groups(gvt))
1239 return -EFAULT;
1240
1241 intel_gvt_ops = ops;
1242
659643f7 1243 return mdev_register_device(dev, &intel_vgpu_ops);
f30437c5
JS
1244}
1245
1246static void kvmgt_host_exit(struct device *dev, void *gvt)
1247{
1248 intel_gvt_cleanup_vgpu_type_groups(gvt);
659643f7 1249 mdev_unregister_device(dev);
f30437c5
JS
1250}
1251
1252static int kvmgt_write_protect_add(unsigned long handle, u64 gfn)
1253{
659643f7
JS
1254 struct kvmgt_guest_info *info;
1255 struct kvm *kvm;
f30437c5
JS
1256 struct kvm_memory_slot *slot;
1257 int idx;
1258
659643f7
JS
1259 if (!handle_valid(handle))
1260 return -ESRCH;
1261
1262 info = (struct kvmgt_guest_info *)handle;
1263 kvm = info->kvm;
1264
f30437c5
JS
1265 idx = srcu_read_lock(&kvm->srcu);
1266 slot = gfn_to_memslot(kvm, gfn);
faaaa53b
JS
1267 if (!slot) {
1268 srcu_read_unlock(&kvm->srcu, idx);
1269 return -EINVAL;
1270 }
f30437c5
JS
1271
1272 spin_lock(&kvm->mmu_lock);
1273
1274 if (kvmgt_gfn_is_write_protected(info, gfn))
1275 goto out;
1276
1277 kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1278 kvmgt_protect_table_add(info, gfn);
1279
1280out:
1281 spin_unlock(&kvm->mmu_lock);
1282 srcu_read_unlock(&kvm->srcu, idx);
1283 return 0;
1284}
1285
1286static int kvmgt_write_protect_remove(unsigned long handle, u64 gfn)
1287{
659643f7
JS
1288 struct kvmgt_guest_info *info;
1289 struct kvm *kvm;
f30437c5
JS
1290 struct kvm_memory_slot *slot;
1291 int idx;
1292
659643f7
JS
1293 if (!handle_valid(handle))
1294 return 0;
1295
1296 info = (struct kvmgt_guest_info *)handle;
1297 kvm = info->kvm;
1298
f30437c5
JS
1299 idx = srcu_read_lock(&kvm->srcu);
1300 slot = gfn_to_memslot(kvm, gfn);
faaaa53b
JS
1301 if (!slot) {
1302 srcu_read_unlock(&kvm->srcu, idx);
1303 return -EINVAL;
1304 }
f30437c5
JS
1305
1306 spin_lock(&kvm->mmu_lock);
1307
1308 if (!kvmgt_gfn_is_write_protected(info, gfn))
1309 goto out;
1310
1311 kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1312 kvmgt_protect_table_del(info, gfn);
1313
1314out:
1315 spin_unlock(&kvm->mmu_lock);
1316 srcu_read_unlock(&kvm->srcu, idx);
1317 return 0;
1318}
1319
1320static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1321 const u8 *val, int len,
1322 struct kvm_page_track_notifier_node *node)
1323{
1324 struct kvmgt_guest_info *info = container_of(node,
1325 struct kvmgt_guest_info, track_node);
1326
1327 if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa)))
1328 intel_gvt_ops->emulate_mmio_write(info->vgpu, gpa,
1329 (void *)val, len);
1330}
1331
1332static void kvmgt_page_track_flush_slot(struct kvm *kvm,
1333 struct kvm_memory_slot *slot,
1334 struct kvm_page_track_notifier_node *node)
1335{
1336 int i;
1337 gfn_t gfn;
1338 struct kvmgt_guest_info *info = container_of(node,
1339 struct kvmgt_guest_info, track_node);
1340
1341 spin_lock(&kvm->mmu_lock);
1342 for (i = 0; i < slot->npages; i++) {
1343 gfn = slot->base_gfn + i;
1344 if (kvmgt_gfn_is_write_protected(info, gfn)) {
1345 kvm_slot_page_track_remove_page(kvm, slot, gfn,
1346 KVM_PAGE_TRACK_WRITE);
1347 kvmgt_protect_table_del(info, gfn);
1348 }
1349 }
1350 spin_unlock(&kvm->mmu_lock);
1351}
1352
659643f7
JS
1353static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu, struct kvm *kvm)
1354{
1355 struct intel_vgpu *itr;
1356 struct kvmgt_guest_info *info;
1357 int id;
1358 bool ret = false;
1359
1360 mutex_lock(&vgpu->gvt->lock);
1361 for_each_active_vgpu(vgpu->gvt, itr, id) {
1362 if (!handle_valid(itr->handle))
1363 continue;
1364
1365 info = (struct kvmgt_guest_info *)itr->handle;
1366 if (kvm && kvm == info->kvm) {
1367 ret = true;
1368 goto out;
1369 }
1370 }
1371out:
1372 mutex_unlock(&vgpu->gvt->lock);
1373 return ret;
1374}
1375
1376static int kvmgt_guest_init(struct mdev_device *mdev)
1377{
1378 struct kvmgt_guest_info *info;
1379 struct intel_vgpu *vgpu;
1380 struct kvm *kvm;
1381
1382 vgpu = mdev_get_drvdata(mdev);
1383 if (handle_valid(vgpu->handle))
1384 return -EEXIST;
1385
1386 kvm = vgpu->vdev.kvm;
1387 if (!kvm || kvm->mm != current->mm) {
695fbc08 1388 gvt_vgpu_err("KVM is required to use Intel vGPU\n");
659643f7
JS
1389 return -ESRCH;
1390 }
1391
1392 if (__kvmgt_vgpu_exist(vgpu, kvm))
1393 return -EEXIST;
1394
1395 info = vzalloc(sizeof(struct kvmgt_guest_info));
1396 if (!info)
1397 return -ENOMEM;
1398
1399 vgpu->handle = (unsigned long)info;
1400 info->vgpu = vgpu;
1401 info->kvm = kvm;
93a15b58 1402 kvm_get_kvm(info->kvm);
659643f7
JS
1403
1404 kvmgt_protect_table_init(info);
1405 gvt_cache_init(vgpu);
1406
1407 info->track_node.track_write = kvmgt_page_track_write;
1408 info->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
1409 kvm_page_track_register_notifier(kvm, &info->track_node);
1410
1411 return 0;
1412}
1413
1414static bool kvmgt_guest_exit(struct kvmgt_guest_info *info)
1415{
659643f7 1416 kvm_page_track_unregister_notifier(info->kvm, &info->track_node);
93a15b58 1417 kvm_put_kvm(info->kvm);
659643f7 1418 kvmgt_protect_table_destroy(info);
8ff842fd 1419 gvt_cache_destroy(info->vgpu);
659643f7
JS
1420 vfree(info);
1421
1422 return true;
1423}
1424
f30437c5
JS
1425static int kvmgt_attach_vgpu(void *vgpu, unsigned long *handle)
1426{
1427 /* nothing to do here */
1428 return 0;
1429}
1430
1431static void kvmgt_detach_vgpu(unsigned long handle)
1432{
1433 /* nothing to do here */
1434}
1435
1436static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data)
1437{
659643f7
JS
1438 struct kvmgt_guest_info *info;
1439 struct intel_vgpu *vgpu;
f30437c5 1440
659643f7
JS
1441 if (!handle_valid(handle))
1442 return -ESRCH;
f30437c5 1443
659643f7
JS
1444 info = (struct kvmgt_guest_info *)handle;
1445 vgpu = info->vgpu;
1446
1447 if (eventfd_signal(vgpu->vdev.msi_trigger, 1) == 1)
1448 return 0;
1449
1450 return -EFAULT;
f30437c5
JS
1451}
1452
1453static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
1454{
b86dc6ed 1455 unsigned long iova, pfn;
659643f7
JS
1456 struct kvmgt_guest_info *info;
1457 struct device *dev;
695fbc08 1458 struct intel_vgpu *vgpu;
f30437c5
JS
1459 int rc;
1460
659643f7
JS
1461 if (!handle_valid(handle))
1462 return INTEL_GVT_INVALID_ADDR;
1463
1464 info = (struct kvmgt_guest_info *)handle;
695fbc08 1465 vgpu = info->vgpu;
b86dc6ed
CD
1466 iova = gvt_cache_find(info->vgpu, gfn);
1467 if (iova != INTEL_GVT_INVALID_ADDR)
1468 return iova;
f30437c5 1469
659643f7 1470 pfn = INTEL_GVT_INVALID_ADDR;
99e3123e 1471 dev = mdev_dev(info->vgpu->vdev.mdev);
659643f7 1472 rc = vfio_pin_pages(dev, &gfn, 1, IOMMU_READ | IOMMU_WRITE, &pfn);
f30437c5 1473 if (rc != 1) {
695fbc08
TZ
1474 gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n",
1475 gfn, rc);
659643f7 1476 return INTEL_GVT_INVALID_ADDR;
f30437c5 1477 }
b86dc6ed
CD
1478 /* transfer to host iova for GFX to use DMA */
1479 rc = gvt_dma_map_iova(info->vgpu, pfn, &iova);
4a0b3444 1480 if (rc) {
695fbc08 1481 gvt_vgpu_err("gvt_dma_map_iova failed for gfn: 0x%lx\n", gfn);
4a0b3444
CD
1482 vfio_unpin_pages(dev, &gfn, 1);
1483 return INTEL_GVT_INVALID_ADDR;
1484 }
f30437c5 1485
b86dc6ed
CD
1486 gvt_cache_add(info->vgpu, gfn, iova);
1487 return iova;
f30437c5
JS
1488}
1489
f30437c5
JS
1490static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
1491 void *buf, unsigned long len, bool write)
1492{
f440c8a5
JS
1493 struct kvmgt_guest_info *info;
1494 struct kvm *kvm;
5180edc2 1495 int idx, ret;
f440c8a5 1496 bool kthread = current->mm == NULL;
f30437c5 1497
659643f7
JS
1498 if (!handle_valid(handle))
1499 return -ESRCH;
1500
f440c8a5
JS
1501 info = (struct kvmgt_guest_info *)handle;
1502 kvm = info->kvm;
f30437c5 1503
f440c8a5
JS
1504 if (kthread)
1505 use_mm(kvm->mm);
f30437c5 1506
5180edc2 1507 idx = srcu_read_lock(&kvm->srcu);
f440c8a5
JS
1508 ret = write ? kvm_write_guest(kvm, gpa, buf, len) :
1509 kvm_read_guest(kvm, gpa, buf, len);
5180edc2 1510 srcu_read_unlock(&kvm->srcu, idx);
f440c8a5
JS
1511
1512 if (kthread)
1513 unuse_mm(kvm->mm);
1514
1515 return ret;
f30437c5
JS
1516}
1517
1518static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa,
1519 void *buf, unsigned long len)
1520{
1521 return kvmgt_rw_gpa(handle, gpa, buf, len, false);
1522}
1523
1524static int kvmgt_write_gpa(unsigned long handle, unsigned long gpa,
1525 void *buf, unsigned long len)
1526{
1527 return kvmgt_rw_gpa(handle, gpa, buf, len, true);
1528}
1529
1530static unsigned long kvmgt_virt_to_pfn(void *addr)
1531{
1532 return PFN_DOWN(__pa(addr));
1533}
1534
1535struct intel_gvt_mpt kvmgt_mpt = {
f30437c5
JS
1536 .host_init = kvmgt_host_init,
1537 .host_exit = kvmgt_host_exit,
1538 .attach_vgpu = kvmgt_attach_vgpu,
1539 .detach_vgpu = kvmgt_detach_vgpu,
1540 .inject_msi = kvmgt_inject_msi,
1541 .from_virt_to_mfn = kvmgt_virt_to_pfn,
1542 .set_wp_page = kvmgt_write_protect_add,
1543 .unset_wp_page = kvmgt_write_protect_remove,
1544 .read_gpa = kvmgt_read_gpa,
1545 .write_gpa = kvmgt_write_gpa,
1546 .gfn_to_mfn = kvmgt_gfn_to_pfn,
1547};
1548EXPORT_SYMBOL_GPL(kvmgt_mpt);
1549
1550static int __init kvmgt_init(void)
1551{
1552 return 0;
1553}
1554
1555static void __exit kvmgt_exit(void)
1556{
1557}
1558
1559module_init(kvmgt_init);
1560module_exit(kvmgt_exit);
1561
1562MODULE_LICENSE("GPL and additional rights");
1563MODULE_AUTHOR("Intel Corporation");