drm/amdgpu: move the drm client creation behind drm device registration
authorLe Ma <le.ma@amd.com>
Thu, 25 Jan 2024 04:00:34 +0000 (12:00 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 29 Jan 2024 20:35:13 +0000 (15:35 -0500)
This patch is to eliminate interrupt warning below:

  "[drm] Fence fallback timer expired on ring sdma0.0".

An early vm pt clearing job is sent to SDMA ahead of interrupt enabled.
And re-locating the drm client creation following after drm_dev_register
looks like a more proper flow.

v2: wrap the drm client creation

Fixes: 1819200166ce ("drm/amdkfd: Export DMABufs from KFD using GEM handles")
Signed-off-by: Le Ma <le.ma@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index dfb93664e866165bfc3b8139c5fe450a5a86f1b3..190039f14c30c676f080e6c55a8c8637901a74fd 100644 (file)
@@ -141,11 +141,31 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work)
 static const struct drm_client_funcs kfd_client_funcs = {
        .unregister     = drm_client_release,
 };
+
+int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev)
+{
+       int ret;
+
+       if (!adev->kfd.init_complete)
+               return 0;
+
+       ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd",
+                             &kfd_client_funcs);
+       if (ret) {
+               dev_err(adev->dev, "Failed to init DRM client: %d\n",
+                       ret);
+               return ret;
+       }
+
+       drm_client_register(&adev->kfd.client);
+
+       return 0;
+}
+
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
        int i;
        int last_valid_bit;
-       int ret;
 
        amdgpu_amdkfd_gpuvm_init_mem_limits();
 
@@ -164,12 +184,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
                        .enable_mes = adev->enable_mes,
                };
 
-               ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", &kfd_client_funcs);
-               if (ret) {
-                       dev_err(adev->dev, "Failed to init DRM client: %d\n", ret);
-                       return;
-               }
-
                /* this is going to have a few of the MSBs set that we need to
                 * clear
                 */
@@ -208,10 +222,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 
                adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
                                                        &gpu_resources);
-               if (adev->kfd.init_complete)
-                       drm_client_register(&adev->kfd.client);
-               else
-                       drm_client_release(&adev->kfd.client);
 
                amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
 
index 50d3e0149032a251b41bbd6e44188b61b0e8af28..298fc52a35bcf4d98c9a594ef8a0be07366edd6b 100644 (file)
@@ -182,6 +182,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
 struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
                                struct mm_struct *mm,
                                struct svm_range_bo *svm_bo);
+
+int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev);
 #if defined(CONFIG_DEBUG_FS)
 int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data);
 #endif
index 475bd59c9ac2b31e0e85fcf1518906bb90d6ef12..91d5d94350671c053a0befd7012e00629d423038 100644 (file)
@@ -2255,6 +2255,10 @@ retry_init:
        if (ret)
                goto err_pci;
 
+       ret = amdgpu_amdkfd_drm_client_create(adev);
+       if (ret)
+               goto err_pci;
+
        /*
         * 1. don't init fbdev on hw without DCE
         * 2. don't init fbdev if there are no connectors