vfio/mlx5: Implement vfio_pci driver for mlx5 devices
authorYishai Hadas <yishaih@nvidia.com>
Thu, 24 Feb 2022 14:20:22 +0000 (16:20 +0200)
committerLeon Romanovsky <leonro@nvidia.com>
Thu, 3 Mar 2022 11:01:19 +0000 (13:01 +0200)
This patch adds support for vfio_pci driver for mlx5 devices.

It uses vfio_pci_core to register to the VFIO subsystem and then
implements the mlx5 specific logic in the migration area.

The migration implementation follows the definition from uapi/vfio.h and
uses the mlx5 VF->PF command channel to achieve it.

This patch implements the suspend/resume flows.

Link: https://lore.kernel.org/all/20220224142024.147653-14-yishaih@nvidia.com
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
MAINTAINERS
drivers/vfio/pci/Kconfig
drivers/vfio/pci/Makefile
drivers/vfio/pci/mlx5/Kconfig [new file with mode: 0644]
drivers/vfio/pci/mlx5/Makefile [new file with mode: 0644]
drivers/vfio/pci/mlx5/cmd.h
drivers/vfio/pci/mlx5/main.c [new file with mode: 0644]

index 777cd6fa2b3d69fcf90c6fa07dff18fe16286f32..4322b53218916502cedd341dd7e3b83525cfd448 100644 (file)
@@ -20320,6 +20320,12 @@ L:     kvm@vger.kernel.org
 S:     Maintained
 F:     drivers/vfio/platform/
 
+VFIO MLX5 PCI DRIVER
+M:     Yishai Hadas <yishaih@nvidia.com>
+L:     kvm@vger.kernel.org
+S:     Maintained
+F:     drivers/vfio/pci/mlx5/
+
 VGA_SWITCHEROO
 R:     Lukas Wunner <lukas@wunner.de>
 S:     Maintained
index 860424ccda1bf11f1662a4629b068c353db42964..187b9c259944a7638d036af0d624c746aae5090b 100644 (file)
@@ -43,4 +43,7 @@ config VFIO_PCI_IGD
 
          To enable Intel IGD assignment through vfio-pci, say Y.
 endif
+
+source "drivers/vfio/pci/mlx5/Kconfig"
+
 endif
index 349d68d242b4253c5dae31efc9a8c6343c4ca434..ed9d6f2e05553a1420679afcc4e895924d168ee3 100644 (file)
@@ -7,3 +7,5 @@ obj-$(CONFIG_VFIO_PCI_CORE) += vfio-pci-core.o
 vfio-pci-y := vfio_pci.o
 vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
 obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
+
+obj-$(CONFIG_MLX5_VFIO_PCI)           += mlx5/
diff --git a/drivers/vfio/pci/mlx5/Kconfig b/drivers/vfio/pci/mlx5/Kconfig
new file mode 100644 (file)
index 0000000..29ba9c5
--- /dev/null
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config MLX5_VFIO_PCI
+       tristate "VFIO support for MLX5 PCI devices"
+       depends on MLX5_CORE
+       depends on VFIO_PCI_CORE
+       help
+         This provides migration support for MLX5 devices using the VFIO
+         framework.
+
+         If you don't know what to do here, say N.
diff --git a/drivers/vfio/pci/mlx5/Makefile b/drivers/vfio/pci/mlx5/Makefile
new file mode 100644 (file)
index 0000000..689627d
--- /dev/null
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_MLX5_VFIO_PCI) += mlx5-vfio-pci.o
+mlx5-vfio-pci-y := main.o cmd.o
+
index 69a1481ed953db9e53d763ca6c8188bdf81328bf..1392a11a9cc0f5a332a0c371ed2b6bc977d99c66 100644 (file)
@@ -12,6 +12,7 @@
 struct mlx5_vf_migration_file {
        struct file *filp;
        struct mutex lock;
+       bool disabled;
 
        struct sg_append_table table;
        size_t total_length;
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
new file mode 100644 (file)
index 0000000..ae1e40f
--- /dev/null
@@ -0,0 +1,623 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#include <linux/device.h>
+#include <linux/eventfd.h>
+#include <linux/file.h>
+#include <linux/interrupt.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/vfio.h>
+#include <linux/sched/mm.h>
+#include <linux/vfio_pci_core.h>
+#include <linux/anon_inodes.h>
+
+#include "cmd.h"
+
+/* Arbitrary to prevent userspace from consuming endless memory */
+#define MAX_MIGRATION_SIZE (512*1024*1024)
+
+struct mlx5vf_pci_core_device {
+       struct vfio_pci_core_device core_device;
+       u16 vhca_id;
+       u8 migrate_cap:1;
+       /* protect migration state */
+       struct mutex state_mutex;
+       enum vfio_device_mig_state mig_state;
+       struct mlx5_vf_migration_file *resuming_migf;
+       struct mlx5_vf_migration_file *saving_migf;
+};
+
+static struct page *
+mlx5vf_get_migration_page(struct mlx5_vf_migration_file *migf,
+                         unsigned long offset)
+{
+       unsigned long cur_offset = 0;
+       struct scatterlist *sg;
+       unsigned int i;
+
+       /* All accesses are sequential */
+       if (offset < migf->last_offset || !migf->last_offset_sg) {
+               migf->last_offset = 0;
+               migf->last_offset_sg = migf->table.sgt.sgl;
+               migf->sg_last_entry = 0;
+       }
+
+       cur_offset = migf->last_offset;
+
+       for_each_sg(migf->last_offset_sg, sg,
+                       migf->table.sgt.orig_nents - migf->sg_last_entry, i) {
+               if (offset < sg->length + cur_offset) {
+                       migf->last_offset_sg = sg;
+                       migf->sg_last_entry += i;
+                       migf->last_offset = cur_offset;
+                       return nth_page(sg_page(sg),
+                                       (offset - cur_offset) / PAGE_SIZE);
+               }
+               cur_offset += sg->length;
+       }
+       return NULL;
+}
+
+static int mlx5vf_add_migration_pages(struct mlx5_vf_migration_file *migf,
+                                     unsigned int npages)
+{
+       unsigned int to_alloc = npages;
+       struct page **page_list;
+       unsigned long filled;
+       unsigned int to_fill;
+       int ret;
+
+       to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
+       page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL);
+       if (!page_list)
+               return -ENOMEM;
+
+       do {
+               filled = alloc_pages_bulk_array(GFP_KERNEL, to_fill, page_list);
+               if (!filled) {
+                       ret = -ENOMEM;
+                       goto err;
+               }
+               to_alloc -= filled;
+               ret = sg_alloc_append_table_from_pages(
+                       &migf->table, page_list, filled, 0,
+                       filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
+                       GFP_KERNEL);
+
+               if (ret)
+                       goto err;
+               migf->allocated_length += filled * PAGE_SIZE;
+               /* clean input for another bulk allocation */
+               memset(page_list, 0, filled * sizeof(*page_list));
+               to_fill = min_t(unsigned int, to_alloc,
+                               PAGE_SIZE / sizeof(*page_list));
+       } while (to_alloc > 0);
+
+       kvfree(page_list);
+       return 0;
+
+err:
+       kvfree(page_list);
+       return ret;
+}
+
+static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf)
+{
+       struct sg_page_iter sg_iter;
+
+       mutex_lock(&migf->lock);
+       /* Undo alloc_pages_bulk_array() */
+       for_each_sgtable_page(&migf->table.sgt, &sg_iter, 0)
+               __free_page(sg_page_iter_page(&sg_iter));
+       sg_free_append_table(&migf->table);
+       migf->disabled = true;
+       migf->total_length = 0;
+       migf->allocated_length = 0;
+       migf->filp->f_pos = 0;
+       mutex_unlock(&migf->lock);
+}
+
+static int mlx5vf_release_file(struct inode *inode, struct file *filp)
+{
+       struct mlx5_vf_migration_file *migf = filp->private_data;
+
+       mlx5vf_disable_fd(migf);
+       mutex_destroy(&migf->lock);
+       kfree(migf);
+       return 0;
+}
+
+static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len,
+                              loff_t *pos)
+{
+       struct mlx5_vf_migration_file *migf = filp->private_data;
+       ssize_t done = 0;
+
+       if (pos)
+               return -ESPIPE;
+       pos = &filp->f_pos;
+
+       mutex_lock(&migf->lock);
+       if (*pos > migf->total_length) {
+               done = -EINVAL;
+               goto out_unlock;
+       }
+       if (migf->disabled) {
+               done = -ENODEV;
+               goto out_unlock;
+       }
+
+       len = min_t(size_t, migf->total_length - *pos, len);
+       while (len) {
+               size_t page_offset;
+               struct page *page;
+               size_t page_len;
+               u8 *from_buff;
+               int ret;
+
+               page_offset = (*pos) % PAGE_SIZE;
+               page = mlx5vf_get_migration_page(migf, *pos - page_offset);
+               if (!page) {
+                       if (done == 0)
+                               done = -EINVAL;
+                       goto out_unlock;
+               }
+
+               page_len = min_t(size_t, len, PAGE_SIZE - page_offset);
+               from_buff = kmap_local_page(page);
+               ret = copy_to_user(buf, from_buff + page_offset, page_len);
+               kunmap_local(from_buff);
+               if (ret) {
+                       done = -EFAULT;
+                       goto out_unlock;
+               }
+               *pos += page_len;
+               len -= page_len;
+               done += page_len;
+               buf += page_len;
+       }
+
+out_unlock:
+       mutex_unlock(&migf->lock);
+       return done;
+}
+
+static const struct file_operations mlx5vf_save_fops = {
+       .owner = THIS_MODULE,
+       .read = mlx5vf_save_read,
+       .release = mlx5vf_release_file,
+       .llseek = no_llseek,
+};
+
+static struct mlx5_vf_migration_file *
+mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev)
+{
+       struct mlx5_vf_migration_file *migf;
+       int ret;
+
+       migf = kzalloc(sizeof(*migf), GFP_KERNEL);
+       if (!migf)
+               return ERR_PTR(-ENOMEM);
+
+       migf->filp = anon_inode_getfile("mlx5vf_mig", &mlx5vf_save_fops, migf,
+                                       O_RDONLY);
+       if (IS_ERR(migf->filp)) {
+               int err = PTR_ERR(migf->filp);
+
+               kfree(migf);
+               return ERR_PTR(err);
+       }
+
+       stream_open(migf->filp->f_inode, migf->filp);
+       mutex_init(&migf->lock);
+
+       ret = mlx5vf_cmd_query_vhca_migration_state(
+               mvdev->core_device.pdev, mvdev->vhca_id, &migf->total_length);
+       if (ret)
+               goto out_free;
+
+       ret = mlx5vf_add_migration_pages(
+               migf, DIV_ROUND_UP_ULL(migf->total_length, PAGE_SIZE));
+       if (ret)
+               goto out_free;
+
+       ret = mlx5vf_cmd_save_vhca_state(mvdev->core_device.pdev,
+                                        mvdev->vhca_id, migf);
+       if (ret)
+               goto out_free;
+       return migf;
+out_free:
+       fput(migf->filp);
+       return ERR_PTR(ret);
+}
+
+static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
+                                  size_t len, loff_t *pos)
+{
+       struct mlx5_vf_migration_file *migf = filp->private_data;
+       loff_t requested_length;
+       ssize_t done = 0;
+
+       if (pos)
+               return -ESPIPE;
+       pos = &filp->f_pos;
+
+       if (*pos < 0 ||
+           check_add_overflow((loff_t)len, *pos, &requested_length))
+               return -EINVAL;
+
+       if (requested_length > MAX_MIGRATION_SIZE)
+               return -ENOMEM;
+
+       mutex_lock(&migf->lock);
+       if (migf->disabled) {
+               done = -ENODEV;
+               goto out_unlock;
+       }
+
+       if (migf->allocated_length < requested_length) {
+               done = mlx5vf_add_migration_pages(
+                       migf,
+                       DIV_ROUND_UP(requested_length - migf->allocated_length,
+                                    PAGE_SIZE));
+               if (done)
+                       goto out_unlock;
+       }
+
+       while (len) {
+               size_t page_offset;
+               struct page *page;
+               size_t page_len;
+               u8 *to_buff;
+               int ret;
+
+               page_offset = (*pos) % PAGE_SIZE;
+               page = mlx5vf_get_migration_page(migf, *pos - page_offset);
+               if (!page) {
+                       if (done == 0)
+                               done = -EINVAL;
+                       goto out_unlock;
+               }
+
+               page_len = min_t(size_t, len, PAGE_SIZE - page_offset);
+               to_buff = kmap_local_page(page);
+               ret = copy_from_user(to_buff + page_offset, buf, page_len);
+               kunmap_local(to_buff);
+               if (ret) {
+                       done = -EFAULT;
+                       goto out_unlock;
+               }
+               *pos += page_len;
+               len -= page_len;
+               done += page_len;
+               buf += page_len;
+               migf->total_length += page_len;
+       }
+out_unlock:
+       mutex_unlock(&migf->lock);
+       return done;
+}
+
+static const struct file_operations mlx5vf_resume_fops = {
+       .owner = THIS_MODULE,
+       .write = mlx5vf_resume_write,
+       .release = mlx5vf_release_file,
+       .llseek = no_llseek,
+};
+
+static struct mlx5_vf_migration_file *
+mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
+{
+       struct mlx5_vf_migration_file *migf;
+
+       migf = kzalloc(sizeof(*migf), GFP_KERNEL);
+       if (!migf)
+               return ERR_PTR(-ENOMEM);
+
+       migf->filp = anon_inode_getfile("mlx5vf_mig", &mlx5vf_resume_fops, migf,
+                                       O_WRONLY);
+       if (IS_ERR(migf->filp)) {
+               int err = PTR_ERR(migf->filp);
+
+               kfree(migf);
+               return ERR_PTR(err);
+       }
+       stream_open(migf->filp->f_inode, migf->filp);
+       mutex_init(&migf->lock);
+       return migf;
+}
+
+static void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev)
+{
+       if (mvdev->resuming_migf) {
+               mlx5vf_disable_fd(mvdev->resuming_migf);
+               fput(mvdev->resuming_migf->filp);
+               mvdev->resuming_migf = NULL;
+       }
+       if (mvdev->saving_migf) {
+               mlx5vf_disable_fd(mvdev->saving_migf);
+               fput(mvdev->saving_migf->filp);
+               mvdev->saving_migf = NULL;
+       }
+}
+
+static struct file *
+mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
+                                   u32 new)
+{
+       u32 cur = mvdev->mig_state;
+       int ret;
+
+       if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_STOP) {
+               ret = mlx5vf_cmd_suspend_vhca(
+                       mvdev->core_device.pdev, mvdev->vhca_id,
+                       MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_RESPONDER);
+               if (ret)
+                       return ERR_PTR(ret);
+               return NULL;
+       }
+
+       if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING_P2P) {
+               ret = mlx5vf_cmd_resume_vhca(
+                       mvdev->core_device.pdev, mvdev->vhca_id,
+                       MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_RESPONDER);
+               if (ret)
+                       return ERR_PTR(ret);
+               return NULL;
+       }
+
+       if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) {
+               ret = mlx5vf_cmd_suspend_vhca(
+                       mvdev->core_device.pdev, mvdev->vhca_id,
+                       MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_INITIATOR);
+               if (ret)
+                       return ERR_PTR(ret);
+               return NULL;
+       }
+
+       if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) {
+               ret = mlx5vf_cmd_resume_vhca(
+                       mvdev->core_device.pdev, mvdev->vhca_id,
+                       MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_INITIATOR);
+               if (ret)
+                       return ERR_PTR(ret);
+               return NULL;
+       }
+
+       if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) {
+               struct mlx5_vf_migration_file *migf;
+
+               migf = mlx5vf_pci_save_device_data(mvdev);
+               if (IS_ERR(migf))
+                       return ERR_CAST(migf);
+               get_file(migf->filp);
+               mvdev->saving_migf = migf;
+               return migf->filp;
+       }
+
+       if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP)) {
+               mlx5vf_disable_fds(mvdev);
+               return 0;
+       }
+
+       if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RESUMING) {
+               struct mlx5_vf_migration_file *migf;
+
+               migf = mlx5vf_pci_resume_device_data(mvdev);
+               if (IS_ERR(migf))
+                       return ERR_CAST(migf);
+               get_file(migf->filp);
+               mvdev->resuming_migf = migf;
+               return migf->filp;
+       }
+
+       if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
+               ret = mlx5vf_cmd_load_vhca_state(mvdev->core_device.pdev,
+                                                mvdev->vhca_id,
+                                                mvdev->resuming_migf);
+               if (ret)
+                       return ERR_PTR(ret);
+               mlx5vf_disable_fds(mvdev);
+               return 0;
+       }
+
+       /*
+        * vfio_mig_get_next_state() does not use arcs other than the above
+        */
+       WARN_ON(true);
+       return ERR_PTR(-EINVAL);
+}
+
+static struct file *
+mlx5vf_pci_set_device_state(struct vfio_device *vdev,
+                           enum vfio_device_mig_state new_state)
+{
+       struct mlx5vf_pci_core_device *mvdev = container_of(
+               vdev, struct mlx5vf_pci_core_device, core_device.vdev);
+       enum vfio_device_mig_state next_state;
+       struct file *res = NULL;
+       int ret;
+
+       mutex_lock(&mvdev->state_mutex);
+       while (new_state != mvdev->mig_state) {
+               ret = vfio_mig_get_next_state(vdev, mvdev->mig_state,
+                                             new_state, &next_state);
+               if (ret) {
+                       res = ERR_PTR(ret);
+                       break;
+               }
+               res = mlx5vf_pci_step_device_state_locked(mvdev, next_state);
+               if (IS_ERR(res))
+                       break;
+               mvdev->mig_state = next_state;
+               if (WARN_ON(res && new_state != mvdev->mig_state)) {
+                       fput(res);
+                       res = ERR_PTR(-EINVAL);
+                       break;
+               }
+       }
+       mutex_unlock(&mvdev->state_mutex);
+       return res;
+}
+
+static int mlx5vf_pci_get_device_state(struct vfio_device *vdev,
+                                      enum vfio_device_mig_state *curr_state)
+{
+       struct mlx5vf_pci_core_device *mvdev = container_of(
+               vdev, struct mlx5vf_pci_core_device, core_device.vdev);
+
+       mutex_lock(&mvdev->state_mutex);
+       *curr_state = mvdev->mig_state;
+       mutex_unlock(&mvdev->state_mutex);
+       return 0;
+}
+
+static int mlx5vf_pci_open_device(struct vfio_device *core_vdev)
+{
+       struct mlx5vf_pci_core_device *mvdev = container_of(
+               core_vdev, struct mlx5vf_pci_core_device, core_device.vdev);
+       struct vfio_pci_core_device *vdev = &mvdev->core_device;
+       int vf_id;
+       int ret;
+
+       ret = vfio_pci_core_enable(vdev);
+       if (ret)
+               return ret;
+
+       if (!mvdev->migrate_cap) {
+               vfio_pci_core_finish_enable(vdev);
+               return 0;
+       }
+
+       vf_id = pci_iov_vf_id(vdev->pdev);
+       if (vf_id < 0) {
+               ret = vf_id;
+               goto out_disable;
+       }
+
+       ret = mlx5vf_cmd_get_vhca_id(vdev->pdev, vf_id + 1, &mvdev->vhca_id);
+       if (ret)
+               goto out_disable;
+
+       mvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
+       vfio_pci_core_finish_enable(vdev);
+       return 0;
+out_disable:
+       vfio_pci_core_disable(vdev);
+       return ret;
+}
+
+static void mlx5vf_pci_close_device(struct vfio_device *core_vdev)
+{
+       struct mlx5vf_pci_core_device *mvdev = container_of(
+               core_vdev, struct mlx5vf_pci_core_device, core_device.vdev);
+
+       mlx5vf_disable_fds(mvdev);
+       vfio_pci_core_close_device(core_vdev);
+}
+
+static const struct vfio_device_ops mlx5vf_pci_ops = {
+       .name = "mlx5-vfio-pci",
+       .open_device = mlx5vf_pci_open_device,
+       .close_device = mlx5vf_pci_close_device,
+       .ioctl = vfio_pci_core_ioctl,
+       .device_feature = vfio_pci_core_ioctl_feature,
+       .read = vfio_pci_core_read,
+       .write = vfio_pci_core_write,
+       .mmap = vfio_pci_core_mmap,
+       .request = vfio_pci_core_request,
+       .match = vfio_pci_core_match,
+       .migration_set_state = mlx5vf_pci_set_device_state,
+       .migration_get_state = mlx5vf_pci_get_device_state,
+};
+
+static int mlx5vf_pci_probe(struct pci_dev *pdev,
+                           const struct pci_device_id *id)
+{
+       struct mlx5vf_pci_core_device *mvdev;
+       int ret;
+
+       mvdev = kzalloc(sizeof(*mvdev), GFP_KERNEL);
+       if (!mvdev)
+               return -ENOMEM;
+       vfio_pci_core_init_device(&mvdev->core_device, pdev, &mlx5vf_pci_ops);
+
+       if (pdev->is_virtfn) {
+               struct mlx5_core_dev *mdev =
+                       mlx5_vf_get_core_dev(pdev);
+
+               if (mdev) {
+                       if (MLX5_CAP_GEN(mdev, migration)) {
+                               mvdev->migrate_cap = 1;
+                               mvdev->core_device.vdev.migration_flags =
+                                       VFIO_MIGRATION_STOP_COPY |
+                                       VFIO_MIGRATION_P2P;
+                               mutex_init(&mvdev->state_mutex);
+                       }
+                       mlx5_vf_put_core_dev(mdev);
+               }
+       }
+
+       ret = vfio_pci_core_register_device(&mvdev->core_device);
+       if (ret)
+               goto out_free;
+
+       dev_set_drvdata(&pdev->dev, mvdev);
+       return 0;
+
+out_free:
+       vfio_pci_core_uninit_device(&mvdev->core_device);
+       kfree(mvdev);
+       return ret;
+}
+
+static void mlx5vf_pci_remove(struct pci_dev *pdev)
+{
+       struct mlx5vf_pci_core_device *mvdev = dev_get_drvdata(&pdev->dev);
+
+       vfio_pci_core_unregister_device(&mvdev->core_device);
+       vfio_pci_core_uninit_device(&mvdev->core_device);
+       kfree(mvdev);
+}
+
+static const struct pci_device_id mlx5vf_pci_table[] = {
+       { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_MELLANOX, 0x101e) }, /* ConnectX Family mlx5Gen Virtual Function */
+       {}
+};
+
+MODULE_DEVICE_TABLE(pci, mlx5vf_pci_table);
+
+static struct pci_driver mlx5vf_pci_driver = {
+       .name = KBUILD_MODNAME,
+       .id_table = mlx5vf_pci_table,
+       .probe = mlx5vf_pci_probe,
+       .remove = mlx5vf_pci_remove,
+};
+
+static void __exit mlx5vf_pci_cleanup(void)
+{
+       pci_unregister_driver(&mlx5vf_pci_driver);
+}
+
+static int __init mlx5vf_pci_init(void)
+{
+       return pci_register_driver(&mlx5vf_pci_driver);
+}
+
+module_init(mlx5vf_pci_init);
+module_exit(mlx5vf_pci_cleanup);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Max Gurtovoy <mgurtovoy@nvidia.com>");
+MODULE_AUTHOR("Yishai Hadas <yishaih@nvidia.com>");
+MODULE_DESCRIPTION(
+       "MLX5 VFIO PCI - User Level meta-driver for MLX5 device family");