Merge tag 'vfio-v4.16-rc1' of git://github.com/awilliam/linux-vfio
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 1 Feb 2018 21:18:25 +0000 (13:18 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 1 Feb 2018 21:18:25 +0000 (13:18 -0800)
Pull VFIO updates from Alex Williamson:

 - Mask INTx from user if pdev->irq is zero (Alexey Kardashevskiy)

 - Capability helper cleanup (Alex Williamson)

 - Allow mmaps overlapping MSI-X vector table with region capability
   exposing this feature (Alexey Kardashevskiy)

 - mdev static cleanups (Xiongwei Song)

* tag 'vfio-v4.16-rc1' of git://github.com/awilliam/linux-vfio:
  vfio: mdev: make a couple of functions and structure vfio_mdev_driver static
  vfio-pci: Allow mapping MSIX BAR
  vfio: Simplify capability helper
  vfio-pci: Mask INTx if a device is not capabable of enabling it

drivers/gpu/drm/i915/gvt/kvmgt.c
drivers/vfio/mdev/vfio_mdev.c
drivers/vfio/pci/vfio_pci.c
drivers/vfio/vfio.c
include/linux/vfio.h
include/uapi/linux/vfio.h

index 96060920a6fea2d9eead2134f313dabd3a003b09..0a7d084da1a2759242be0f7144ad77bc5967df0d 100644 (file)
@@ -1012,6 +1012,8 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
                        if (!sparse)
                                return -ENOMEM;
 
+                       sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
+                       sparse->header.version = 1;
                        sparse->nr_areas = nr_areas;
                        cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
                        sparse->areas[0].offset =
@@ -1033,7 +1035,9 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
                        break;
                default:
                        {
-                               struct vfio_region_info_cap_type cap_type;
+                               struct vfio_region_info_cap_type cap_type = {
+                                       .header.id = VFIO_REGION_INFO_CAP_TYPE,
+                                       .header.version = 1 };
 
                                if (info.index >= VFIO_PCI_NUM_REGIONS +
                                                vgpu->vdev.num_regions)
@@ -1050,8 +1054,8 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
                                cap_type.subtype = vgpu->vdev.region[i].subtype;
 
                                ret = vfio_info_add_capability(&caps,
-                                               VFIO_REGION_INFO_CAP_TYPE,
-                                               &cap_type);
+                                                       &cap_type.header,
+                                                       sizeof(cap_type));
                                if (ret)
                                        return ret;
                        }
@@ -1061,8 +1065,9 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
                        switch (cap_type_id) {
                        case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
                                ret = vfio_info_add_capability(&caps,
-                                       VFIO_REGION_INFO_CAP_SPARSE_MMAP,
-                                       sparse);
+                                       &sparse->header, sizeof(*sparse) +
+                                       (sparse->nr_areas *
+                                               sizeof(*sparse->areas)));
                                kfree(sparse);
                                if (ret)
                                        return ret;
index fa848a701b8b4ed921ae56248eac3ef0fdc7d5a3..d230620fe02d10c3b7c1030131f2286152bbf170 100644 (file)
@@ -111,19 +111,19 @@ static const struct vfio_device_ops vfio_mdev_dev_ops = {
        .mmap           = vfio_mdev_mmap,
 };
 
-int vfio_mdev_probe(struct device *dev)
+static int vfio_mdev_probe(struct device *dev)
 {
        struct mdev_device *mdev = to_mdev_device(dev);
 
        return vfio_add_group_dev(dev, &vfio_mdev_dev_ops, mdev);
 }
 
-void vfio_mdev_remove(struct device *dev)
+static void vfio_mdev_remove(struct device *dev)
 {
        vfio_del_group_dev(dev);
 }
 
-struct mdev_driver vfio_mdev_driver = {
+static struct mdev_driver vfio_mdev_driver = {
        .name   = "vfio_mdev",
        .probe  = vfio_mdev_probe,
        .remove = vfio_mdev_remove,
index f041b1a6cf665e6410917d3608de5fe0ac557476..b0f759476900d1520565810a4ead4cc2cc3a1ac2 100644 (file)
@@ -207,6 +207,9 @@ static bool vfio_pci_nointx(struct pci_dev *pdev)
                }
        }
 
+       if (!pdev->irq)
+               return true;
+
        return false;
 }
 
@@ -562,46 +565,15 @@ static int vfio_pci_for_each_slot_or_bus(struct pci_dev *pdev,
        return walk.ret;
 }
 
-static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev,
-                               struct vfio_info_cap *caps)
+static int msix_mmappable_cap(struct vfio_pci_device *vdev,
+                             struct vfio_info_cap *caps)
 {
-       struct vfio_region_info_cap_sparse_mmap *sparse;
-       size_t end, size;
-       int nr_areas = 2, i = 0, ret;
-
-       end = pci_resource_len(vdev->pdev, vdev->msix_bar);
-
-       /* If MSI-X table is aligned to the start or end, only one area */
-       if (((vdev->msix_offset & PAGE_MASK) == 0) ||
-           (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) >= end))
-               nr_areas = 1;
-
-       size = sizeof(*sparse) + (nr_areas * sizeof(*sparse->areas));
-
-       sparse = kzalloc(size, GFP_KERNEL);
-       if (!sparse)
-               return -ENOMEM;
-
-       sparse->nr_areas = nr_areas;
-
-       if (vdev->msix_offset & PAGE_MASK) {
-               sparse->areas[i].offset = 0;
-               sparse->areas[i].size = vdev->msix_offset & PAGE_MASK;
-               i++;
-       }
-
-       if (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) < end) {
-               sparse->areas[i].offset = PAGE_ALIGN(vdev->msix_offset +
-                                                    vdev->msix_size);
-               sparse->areas[i].size = end - sparse->areas[i].offset;
-               i++;
-       }
-
-       ret = vfio_info_add_capability(caps, VFIO_REGION_INFO_CAP_SPARSE_MMAP,
-                                      sparse);
-       kfree(sparse);
+       struct vfio_info_cap_header header = {
+               .id = VFIO_REGION_INFO_CAP_MSIX_MAPPABLE,
+               .version = 1
+       };
 
-       return ret;
+       return vfio_info_add_capability(caps, &header, sizeof(header));
 }
 
 int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
@@ -692,7 +664,7 @@ static long vfio_pci_ioctl(void *device_data,
                        if (vdev->bar_mmap_supported[info.index]) {
                                info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
                                if (info.index == vdev->msix_bar) {
-                                       ret = msix_sparse_mmap_cap(vdev, &caps);
+                                       ret = msix_mmappable_cap(vdev, &caps);
                                        if (ret)
                                                return ret;
                                }
@@ -741,7 +713,9 @@ static long vfio_pci_ioctl(void *device_data,
                        break;
                default:
                {
-                       struct vfio_region_info_cap_type cap_type;
+                       struct vfio_region_info_cap_type cap_type = {
+                                       .header.id = VFIO_REGION_INFO_CAP_TYPE,
+                                       .header.version = 1 };
 
                        if (info.index >=
                            VFIO_PCI_NUM_REGIONS + vdev->num_regions)
@@ -756,9 +730,8 @@ static long vfio_pci_ioctl(void *device_data,
                        cap_type.type = vdev->region[i].type;
                        cap_type.subtype = vdev->region[i].subtype;
 
-                       ret = vfio_info_add_capability(&caps,
-                                                     VFIO_REGION_INFO_CAP_TYPE,
-                                                     &cap_type);
+                       ret = vfio_info_add_capability(&caps, &cap_type.header,
+                                                      sizeof(cap_type));
                        if (ret)
                                return ret;
 
@@ -1122,22 +1095,6 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
        if (req_start + req_len > phys_len)
                return -EINVAL;
 
-       if (index == vdev->msix_bar) {
-               /*
-                * Disallow mmaps overlapping the MSI-X table; users don't
-                * get to touch this directly.  We could find somewhere
-                * else to map the overlap, but page granularity is only
-                * a recommendation, not a requirement, so the user needs
-                * to know which bits are real.  Requiring them to mmap
-                * around the table makes that clear.
-                */
-
-               /* If neither entirely above nor below, then it overlaps */
-               if (!(req_start >= vdev->msix_offset + vdev->msix_size ||
-                     req_start + req_len <= vdev->msix_offset))
-                       return -EINVAL;
-       }
-
        /*
         * Even though we don't make use of the barmap for the mmap,
         * we need to request the region and the barmap tracks that.
index 2bc3705a99bd2f1a670e96c9f1175795b961c922..721f97f8dac1f2c979e846dcd89aa6878e195b59 100644 (file)
@@ -1857,63 +1857,19 @@ void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
 }
 EXPORT_SYMBOL(vfio_info_cap_shift);
 
-static int sparse_mmap_cap(struct vfio_info_cap *caps, void *cap_type)
+int vfio_info_add_capability(struct vfio_info_cap *caps,
+                            struct vfio_info_cap_header *cap, size_t size)
 {
        struct vfio_info_cap_header *header;
-       struct vfio_region_info_cap_sparse_mmap *sparse_cap, *sparse = cap_type;
-       size_t size;
 
-       size = sizeof(*sparse) + sparse->nr_areas *  sizeof(*sparse->areas);
-       header = vfio_info_cap_add(caps, size,
-                                  VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1);
+       header = vfio_info_cap_add(caps, size, cap->id, cap->version);
        if (IS_ERR(header))
                return PTR_ERR(header);
 
-       sparse_cap = container_of(header,
-                       struct vfio_region_info_cap_sparse_mmap, header);
-       sparse_cap->nr_areas = sparse->nr_areas;
-       memcpy(sparse_cap->areas, sparse->areas,
-              sparse->nr_areas * sizeof(*sparse->areas));
-       return 0;
-}
-
-static int region_type_cap(struct vfio_info_cap *caps, void *cap_type)
-{
-       struct vfio_info_cap_header *header;
-       struct vfio_region_info_cap_type *type_cap, *cap = cap_type;
+       memcpy(header + 1, cap + 1, size - sizeof(*header));
 
-       header = vfio_info_cap_add(caps, sizeof(*cap),
-                                  VFIO_REGION_INFO_CAP_TYPE, 1);
-       if (IS_ERR(header))
-               return PTR_ERR(header);
-
-       type_cap = container_of(header, struct vfio_region_info_cap_type,
-                               header);
-       type_cap->type = cap->type;
-       type_cap->subtype = cap->subtype;
        return 0;
 }
-
-int vfio_info_add_capability(struct vfio_info_cap *caps, int cap_type_id,
-                            void *cap_type)
-{
-       int ret = -EINVAL;
-
-       if (!cap_type)
-               return 0;
-
-       switch (cap_type_id) {
-       case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
-               ret = sparse_mmap_cap(caps, cap_type);
-               break;
-
-       case VFIO_REGION_INFO_CAP_TYPE:
-               ret = region_type_cap(caps, cap_type);
-               break;
-       }
-
-       return ret;
-}
 EXPORT_SYMBOL(vfio_info_add_capability);
 
 int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
index a47b985341d12cb685a525c9a5d7179d376cb6a9..66741ab087c12b1d5be26ce4e088f895e594cf42 100644 (file)
@@ -145,7 +145,8 @@ extern struct vfio_info_cap_header *vfio_info_cap_add(
 extern void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset);
 
 extern int vfio_info_add_capability(struct vfio_info_cap *caps,
-                                   int cap_type_id, void *cap_type);
+                                   struct vfio_info_cap_header *cap,
+                                   size_t size);
 
 extern int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr,
                                              int num_irqs, int max_irq_type,
index e3301dbd27d48521912015500604077fa6f4fac8..0d914350f7bf503290bc966b8ac5b1f55641c66b 100644 (file)
@@ -301,6 +301,16 @@ struct vfio_region_info_cap_type {
 #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
 #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG  (3)
 
+/*
+ * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped
+ * which allows direct access to non-MSIX registers which happened to be within
+ * the same system page.
+ *
+ * Even though the userspace gets direct access to the MSIX data, the existing
+ * VFIO_DEVICE_SET_IRQS interface must still be used for MSIX configuration.
+ */
+#define VFIO_REGION_INFO_CAP_MSIX_MAPPABLE     3
+
 /**
  * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
  *                                 struct vfio_irq_info)