IB/mlx5: Fix cleanup order on unload
authorMark Bloch <markb@mellanox.com>
Wed, 14 Mar 2018 07:14:15 +0000 (09:14 +0200)
committerDoug Ledford <dledford@redhat.com>
Wed, 14 Mar 2018 20:44:02 +0000 (16:44 -0400)
On load we create private CQ/QP/PD in order to be used by UMR, we create
those resources after we register ourself as an IB device, and we destroy
them after we unregister as an IB device. This was changed by commit
16c1975f1032 ("IB/mlx5: Create profile infrastructure to add and remove
stages") which moved the destruction before we unregistration. This
allowed to trigger an invalid memory access when unloading mlx5_ib while
there are open resources:

BUG: unable to handle kernel paging request at 00000001002c012c
...
Call Trace:
 mlx5_ib_post_send_wait+0x75/0x110 [mlx5_ib]
 __slab_free+0x9a/0x2d0
 delay_time_func+0x10/0x10 [mlx5_ib]
 unreg_umr.isra.15+0x4b/0x50 [mlx5_ib]
 mlx5_mr_cache_free+0x46/0x150 [mlx5_ib]
 clean_mr+0xc9/0x190 [mlx5_ib]
 dereg_mr+0xba/0xf0 [mlx5_ib]
 ib_dereg_mr+0x13/0x20 [ib_core]
 remove_commit_idr_uobject+0x16/0x70 [ib_uverbs]
 uverbs_cleanup_ucontext+0xe8/0x1a0 [ib_uverbs]
 ib_uverbs_cleanup_ucontext.isra.9+0x19/0x40 [ib_uverbs]
 ib_uverbs_remove_one+0x162/0x2e0 [ib_uverbs]
 ib_unregister_device+0xd4/0x190 [ib_core]
 __mlx5_ib_remove+0x2e/0x40 [mlx5_ib]
 mlx5_remove_device+0xf5/0x120 [mlx5_core]
 mlx5_unregister_interface+0x37/0x90 [mlx5_core]
 mlx5_ib_cleanup+0xc/0x225 [mlx5_ib]
 SyS_delete_module+0x153/0x230
 do_syscall_64+0x62/0x110
 entry_SYSCALL_64_after_hwframe+0x21/0x86
...

We restore the original behavior by breaking the UMR stage into two parts,
pre and post IB registration stages, this way we can restore the original
functionality and maintain clean separation of logic between stages.

Fixes: 16c1975f1032 ("IB/mlx5: Create profile infrastructure to add and remove stages")
Signed-off-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h

index 033b6af90de9eeacffba3a4f9ca7293d557eb8b5..da091de4e69d81eda73de4e3ce758830aaa1413c 100644 (file)
@@ -4860,19 +4860,19 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
        return ib_register_device(&dev->ib_dev, NULL);
 }
 
-static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
+static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
 {
-       ib_unregister_device(&dev->ib_dev);
+       destroy_umrc_res(dev);
 }
 
-static int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev)
+static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
 {
-       return create_umr_res(dev);
+       ib_unregister_device(&dev->ib_dev);
 }
 
-static void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev)
+static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
 {
-       destroy_umrc_res(dev);
+       return create_umr_res(dev);
 }
 
 static int mlx5_ib_stage_delay_drop_init(struct mlx5_ib_dev *dev)
@@ -4982,12 +4982,15 @@ static const struct mlx5_ib_profile pf_profile = {
        STAGE_CREATE(MLX5_IB_STAGE_BFREG,
                     mlx5_ib_stage_bfrag_init,
                     mlx5_ib_stage_bfrag_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
+                    NULL,
+                    mlx5_ib_stage_pre_ib_reg_umr_cleanup),
        STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
                     mlx5_ib_stage_ib_reg_init,
                     mlx5_ib_stage_ib_reg_cleanup),
-       STAGE_CREATE(MLX5_IB_STAGE_UMR_RESOURCES,
-                    mlx5_ib_stage_umr_res_init,
-                    mlx5_ib_stage_umr_res_cleanup),
+       STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
+                    mlx5_ib_stage_post_ib_reg_umr_init,
+                    NULL),
        STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP,
                     mlx5_ib_stage_delay_drop_init,
                     mlx5_ib_stage_delay_drop_cleanup),
index 1393851299738d6173daff863e444e9fc5346af0..a5272499b60054d3a37a84cf277a0c067ca1c6e4 100644 (file)
@@ -739,8 +739,9 @@ enum mlx5_ib_stages {
        MLX5_IB_STAGE_CONG_DEBUGFS,
        MLX5_IB_STAGE_UAR,
        MLX5_IB_STAGE_BFREG,
+       MLX5_IB_STAGE_PRE_IB_REG_UMR,
        MLX5_IB_STAGE_IB_REG,
-       MLX5_IB_STAGE_UMR_RESOURCES,
+       MLX5_IB_STAGE_POST_IB_REG_UMR,
        MLX5_IB_STAGE_DELAY_DROP,
        MLX5_IB_STAGE_CLASS_ATTR,
        MLX5_IB_STAGE_MAX,