vfio/mlx5: Enforce PRE_COPY support
authorYishai Hadas <yishaih@nvidia.com>
Wed, 6 Mar 2024 10:56:24 +0000 (12:56 +0200)
committerAlex Williamson <alex.williamson@redhat.com>
Mon, 11 Mar 2024 18:02:59 +0000 (12:02 -0600)
Enable live migration only once the firmware supports PRE_COPY.

PRE_COPY has been supported by the firmware for a long time already [1]
and is required to achieve a low downtime upon live migration.

This lets us clean up some old code that is not applicable those days
while PRE_COPY is fully supported by the firmware.

[1] The minimum firmware version that supports PRE_COPY is 28.36.1010,
it was released in January 2023.

No firmware without PRE_COPY support ever available to users.

Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20240306105624.114830-1-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
drivers/vfio/pci/mlx5/cmd.c
drivers/vfio/pci/mlx5/cmd.h
drivers/vfio/pci/mlx5/main.c

index c54bcd5d09172705b6d0112a61f42b42e38f2054..41a4b0cf429756b6e72f476ba794ad6b3baaf65e 100644 (file)
@@ -233,6 +233,10 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
        if (!MLX5_CAP_GEN(mvdev->mdev, migration))
                goto end;
 
+       if (!(MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) &&
+             MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state)))
+               goto end;
+
        mvdev->vf_id = pci_iov_vf_id(pdev);
        if (mvdev->vf_id < 0)
                goto end;
@@ -262,17 +266,14 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
        mvdev->migrate_cap = 1;
        mvdev->core_device.vdev.migration_flags =
                VFIO_MIGRATION_STOP_COPY |
-               VFIO_MIGRATION_P2P;
+               VFIO_MIGRATION_P2P |
+               VFIO_MIGRATION_PRE_COPY;
+
        mvdev->core_device.vdev.mig_ops = mig_ops;
        init_completion(&mvdev->tracker_comp);
        if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization))
                mvdev->core_device.vdev.log_ops = log_ops;
 
-       if (MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) &&
-           MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state))
-               mvdev->core_device.vdev.migration_flags |=
-                       VFIO_MIGRATION_PRE_COPY;
-
        if (MLX5_CAP_GEN_2(mvdev->mdev, migration_in_chunks))
                mvdev->chunk_mode = 1;
 
@@ -414,6 +415,50 @@ void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf)
        kfree(buf);
 }
 
+static int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
+                                     unsigned int npages)
+{
+       unsigned int to_alloc = npages;
+       struct page **page_list;
+       unsigned long filled;
+       unsigned int to_fill;
+       int ret;
+
+       to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
+       page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT);
+       if (!page_list)
+               return -ENOMEM;
+
+       do {
+               filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill,
+                                               page_list);
+               if (!filled) {
+                       ret = -ENOMEM;
+                       goto err;
+               }
+               to_alloc -= filled;
+               ret = sg_alloc_append_table_from_pages(
+                       &buf->table, page_list, filled, 0,
+                       filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
+                       GFP_KERNEL_ACCOUNT);
+
+               if (ret)
+                       goto err;
+               buf->allocated_length += filled * PAGE_SIZE;
+               /* clean input for another bulk allocation */
+               memset(page_list, 0, filled * sizeof(*page_list));
+               to_fill = min_t(unsigned int, to_alloc,
+                               PAGE_SIZE / sizeof(*page_list));
+       } while (to_alloc > 0);
+
+       kvfree(page_list);
+       return 0;
+
+err:
+       kvfree(page_list);
+       return ret;
+}
+
 struct mlx5_vhca_data_buffer *
 mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
                         size_t length,
@@ -680,22 +725,20 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
                goto err_out;
        }
 
-       if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
-               if (async_data->stop_copy_chunk) {
-                       u8 header_idx = buf->stop_copy_chunk_num ?
-                               buf->stop_copy_chunk_num - 1 : 0;
+       if (async_data->stop_copy_chunk) {
+               u8 header_idx = buf->stop_copy_chunk_num ?
+                       buf->stop_copy_chunk_num - 1 : 0;
 
-                       header_buf = migf->buf_header[header_idx];
-                       migf->buf_header[header_idx] = NULL;
-               }
+               header_buf = migf->buf_header[header_idx];
+               migf->buf_header[header_idx] = NULL;
+       }
 
-               if (!header_buf) {
-                       header_buf = mlx5vf_get_data_buffer(migf,
-                               sizeof(struct mlx5_vf_migration_header), DMA_NONE);
-                       if (IS_ERR(header_buf)) {
-                               err = PTR_ERR(header_buf);
-                               goto err_free;
-                       }
+       if (!header_buf) {
+               header_buf = mlx5vf_get_data_buffer(migf,
+                       sizeof(struct mlx5_vf_migration_header), DMA_NONE);
+               if (IS_ERR(header_buf)) {
+                       err = PTR_ERR(header_buf);
+                       goto err_free;
                }
        }
 
index 707393df36c4b68aca854b77d3ae09f2b92a47f4..df421dc6de04852c07cbabbf2e8bb78d3669aaa1 100644 (file)
@@ -13,9 +13,6 @@
 #include <linux/mlx5/cq.h>
 #include <linux/mlx5/qp.h>
 
-#define MLX5VF_PRE_COPY_SUPP(mvdev) \
-       ((mvdev)->core_device.vdev.migration_flags & VFIO_MIGRATION_PRE_COPY)
-
 enum mlx5_vf_migf_state {
        MLX5_MIGF_STATE_ERROR = 1,
        MLX5_MIGF_STATE_PRE_COPY_ERROR,
@@ -25,7 +22,6 @@ enum mlx5_vf_migf_state {
 };
 
 enum mlx5_vf_load_state {
-       MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER,
        MLX5_VF_LOAD_STATE_READ_HEADER,
        MLX5_VF_LOAD_STATE_PREP_HEADER_DATA,
        MLX5_VF_LOAD_STATE_READ_HEADER_DATA,
@@ -228,8 +224,6 @@ struct mlx5_vhca_data_buffer *
 mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
                       size_t length, enum dma_data_direction dma_dir);
 void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf);
-int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
-                              unsigned int npages);
 struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
                                       unsigned long offset);
 void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
index 3982fcf60cf28b272d2e9daba00511c82809b5c2..61d9b0f9146d1b23c38194bc95aac4c533ed2fc3 100644 (file)
@@ -65,50 +65,6 @@ mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
        return NULL;
 }
 
-int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
-                              unsigned int npages)
-{
-       unsigned int to_alloc = npages;
-       struct page **page_list;
-       unsigned long filled;
-       unsigned int to_fill;
-       int ret;
-
-       to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
-       page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT);
-       if (!page_list)
-               return -ENOMEM;
-
-       do {
-               filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill,
-                                               page_list);
-               if (!filled) {
-                       ret = -ENOMEM;
-                       goto err;
-               }
-               to_alloc -= filled;
-               ret = sg_alloc_append_table_from_pages(
-                       &buf->table, page_list, filled, 0,
-                       filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
-                       GFP_KERNEL_ACCOUNT);
-
-               if (ret)
-                       goto err;
-               buf->allocated_length += filled * PAGE_SIZE;
-               /* clean input for another bulk allocation */
-               memset(page_list, 0, filled * sizeof(*page_list));
-               to_fill = min_t(unsigned int, to_alloc,
-                               PAGE_SIZE / sizeof(*page_list));
-       } while (to_alloc > 0);
-
-       kvfree(page_list);
-       return 0;
-
-err:
-       kvfree(page_list);
-       return ret;
-}
-
 static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf)
 {
        mutex_lock(&migf->lock);
@@ -777,36 +733,6 @@ mlx5vf_append_page_to_mig_buf(struct mlx5_vhca_data_buffer *vhca_buf,
        return 0;
 }
 
-static int
-mlx5vf_resume_read_image_no_header(struct mlx5_vhca_data_buffer *vhca_buf,
-                                  loff_t requested_length,
-                                  const char __user **buf, size_t *len,
-                                  loff_t *pos, ssize_t *done)
-{
-       int ret;
-
-       if (requested_length > MAX_LOAD_SIZE)
-               return -ENOMEM;
-
-       if (vhca_buf->allocated_length < requested_length) {
-               ret = mlx5vf_add_migration_pages(
-                       vhca_buf,
-                       DIV_ROUND_UP(requested_length - vhca_buf->allocated_length,
-                                    PAGE_SIZE));
-               if (ret)
-                       return ret;
-       }
-
-       while (*len) {
-               ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, len, pos,
-                                                   done);
-               if (ret)
-                       return ret;
-       }
-
-       return 0;
-}
-
 static ssize_t
 mlx5vf_resume_read_image(struct mlx5_vf_migration_file *migf,
                         struct mlx5_vhca_data_buffer *vhca_buf,
@@ -1038,13 +964,6 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
                        migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE;
                        break;
                }
-               case MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER:
-                       ret = mlx5vf_resume_read_image_no_header(vhca_buf,
-                                               requested_length,
-                                               &buf, &len, pos, &done);
-                       if (ret)
-                               goto out_unlock;
-                       break;
                case MLX5_VF_LOAD_STATE_READ_IMAGE:
                        ret = mlx5vf_resume_read_image(migf, vhca_buf,
                                                migf->record_size,
@@ -1114,21 +1033,16 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
        }
 
        migf->buf[0] = buf;
-       if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
-               buf = mlx5vf_alloc_data_buffer(migf,
-                       sizeof(struct mlx5_vf_migration_header), DMA_NONE);
-               if (IS_ERR(buf)) {
-                       ret = PTR_ERR(buf);
-                       goto out_buf;
-               }
-
-               migf->buf_header[0] = buf;
-               migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
-       } else {
-               /* Initial state will be to read the image */
-               migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER;
+       buf = mlx5vf_alloc_data_buffer(migf,
+               sizeof(struct mlx5_vf_migration_header), DMA_NONE);
+       if (IS_ERR(buf)) {
+               ret = PTR_ERR(buf);
+               goto out_buf;
        }
 
+       migf->buf_header[0] = buf;
+       migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
+
        stream_open(migf->filp->f_inode, migf->filp);
        mutex_init(&migf->lock);
        INIT_LIST_HEAD(&migf->buf_list);
@@ -1262,13 +1176,6 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
        }
 
        if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
-               if (!MLX5VF_PRE_COPY_SUPP(mvdev)) {
-                       ret = mlx5vf_cmd_load_vhca_state(mvdev,
-                                                        mvdev->resuming_migf,
-                                                        mvdev->resuming_migf->buf[0]);
-                       if (ret)
-                               return ERR_PTR(ret);
-               }
                mlx5vf_disable_fds(mvdev, NULL);
                return NULL;
        }