vfio/mlx5: Let firmware knows upon leaving PRE_COPY back to RUNNING
authorYishai Hadas <yishaih@nvidia.com>
Mon, 5 Feb 2024 12:48:28 +0000 (14:48 +0200)
committerAlex Williamson <alex.williamson@redhat.com>
Thu, 22 Feb 2024 19:17:32 +0000 (12:17 -0700)
Let firmware knows upon leaving PRE_COPY back to RUNNING as of some
error in the target/migration cancellation.

This will let firmware cleaning its internal resources that were turned
on upon PRE_COPY.

The flow is based on the device specification in this area.

Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Acked-by: Leon Romanovsky <leon@kernel.org>
Link: https://lore.kernel.org/r/20240205124828.232701-6-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
drivers/vfio/pci/mlx5/cmd.c
drivers/vfio/pci/mlx5/cmd.h
drivers/vfio/pci/mlx5/main.c

index 6800e4ffe9ee60e2367a60bbeb35364d3687eefe..c54bcd5d09172705b6d0112a61f42b42e38f2054 100644 (file)
@@ -108,8 +108,9 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
                ret = wait_for_completion_interruptible(&mvdev->saving_migf->save_comp);
                if (ret)
                        return ret;
-               if (mvdev->saving_migf->state ==
-                   MLX5_MIGF_STATE_PRE_COPY_ERROR) {
+               /* Upon cleanup, ignore previous pre_copy error state */
+               if (mvdev->saving_migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR &&
+                   !(query_flags & MLX5VF_QUERY_CLEANUP)) {
                        /*
                         * In case we had a PRE_COPY error, only query full
                         * image for final image
@@ -200,7 +201,7 @@ void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev)
        /* Must be done outside the lock to let it progress */
        set_tracker_error(mvdev);
        mutex_lock(&mvdev->state_mutex);
-       mlx5vf_disable_fds(mvdev);
+       mlx5vf_disable_fds(mvdev, NULL);
        _mlx5vf_free_page_tracker_resources(mvdev);
        mlx5vf_state_mutex_unlock(mvdev);
 }
@@ -639,6 +640,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
        u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
        struct mlx5_vhca_data_buffer *header_buf = NULL;
        struct mlx5vf_async_data *async_data;
+       bool pre_copy_cleanup = false;
        int err;
 
        lockdep_assert_held(&mvdev->state_mutex);
@@ -649,6 +651,10 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
        if (err)
                return err;
 
+       if ((migf->state == MLX5_MIGF_STATE_PRE_COPY ||
+            migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR) && !track && !inc)
+               pre_copy_cleanup = true;
+
        if (migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR)
                /*
                 * In case we had a PRE_COPY error, SAVE is triggered only for
@@ -667,7 +673,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
 
        async_data = &migf->async_data;
        async_data->buf = buf;
-       async_data->stop_copy_chunk = !track;
+       async_data->stop_copy_chunk = (!track && !pre_copy_cleanup);
        async_data->out = kvzalloc(out_size, GFP_KERNEL);
        if (!async_data->out) {
                err = -ENOMEM;
index 0d6a2db3d8015cf2e91b21add9890987cb0e04cd..707393df36c4b68aca854b77d3ae09f2b92a47f4 100644 (file)
@@ -197,6 +197,7 @@ struct mlx5vf_pci_core_device {
 enum {
        MLX5VF_QUERY_INC = (1UL << 0),
        MLX5VF_QUERY_FINAL = (1UL << 1),
+       MLX5VF_QUERY_CLEANUP = (1UL << 2),
 };
 
 int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod);
@@ -232,7 +233,8 @@ int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
 struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
                                       unsigned long offset);
 void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
-void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev);
+void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev,
+                       enum mlx5_vf_migf_state *last_save_state);
 void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work);
 void mlx5vf_mig_file_set_save_work(struct mlx5_vf_migration_file *migf,
                                   u8 chunk_num, size_t next_required_umem_size);
index fe09a8c8af95e8dedac6e08a4fba74379d1c4b5d..3982fcf60cf28b272d2e9daba00511c82809b5c2 100644 (file)
@@ -1146,7 +1146,8 @@ end:
        return ERR_PTR(ret);
 }
 
-void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev)
+void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev,
+                       enum mlx5_vf_migf_state *last_save_state)
 {
        if (mvdev->resuming_migf) {
                mlx5vf_disable_fd(mvdev->resuming_migf);
@@ -1157,6 +1158,8 @@ void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev)
        if (mvdev->saving_migf) {
                mlx5_cmd_cleanup_async_ctx(&mvdev->saving_migf->async_ctx);
                cancel_work_sync(&mvdev->saving_migf->async_data.work);
+               if (last_save_state)
+                       *last_save_state = mvdev->saving_migf->state;
                mlx5vf_disable_fd(mvdev->saving_migf);
                wake_up_interruptible(&mvdev->saving_migf->poll_wait);
                mlx5fv_cmd_clean_migf_resources(mvdev->saving_migf);
@@ -1217,12 +1220,34 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
                return migf->filp;
        }
 
-       if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) ||
-           (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) ||
+       if (cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) {
+               mlx5vf_disable_fds(mvdev, NULL);
+               return NULL;
+       }
+
+       if ((cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) ||
            (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P &&
             new == VFIO_DEVICE_STATE_RUNNING_P2P)) {
-               mlx5vf_disable_fds(mvdev);
-               return NULL;
+               struct mlx5_vf_migration_file *migf = mvdev->saving_migf;
+               struct mlx5_vhca_data_buffer *buf;
+               enum mlx5_vf_migf_state state;
+               size_t size;
+
+               ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &size, NULL,
+                                       MLX5VF_QUERY_INC | MLX5VF_QUERY_CLEANUP);
+               if (ret)
+                       return ERR_PTR(ret);
+               buf = mlx5vf_get_data_buffer(migf, size, DMA_FROM_DEVICE);
+               if (IS_ERR(buf))
+                       return ERR_CAST(buf);
+               /* pre_copy cleanup */
+               ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, false, false);
+               if (ret) {
+                       mlx5vf_put_data_buffer(buf);
+                       return ERR_PTR(ret);
+               }
+               mlx5vf_disable_fds(mvdev, &state);
+               return (state != MLX5_MIGF_STATE_ERROR) ? NULL : ERR_PTR(-EIO);
        }
 
        if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RESUMING) {
@@ -1244,7 +1269,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
                        if (ret)
                                return ERR_PTR(ret);
                }
-               mlx5vf_disable_fds(mvdev);
+               mlx5vf_disable_fds(mvdev, NULL);
                return NULL;
        }
 
@@ -1289,7 +1314,7 @@ again:
                mvdev->deferred_reset = false;
                spin_unlock(&mvdev->reset_lock);
                mvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
-               mlx5vf_disable_fds(mvdev);
+               mlx5vf_disable_fds(mvdev, NULL);
                goto again;
        }
        mutex_unlock(&mvdev->state_mutex);