drm/amdgpu: Fix dangling kfd_bo pointer for shared BOs
authorFelix Kuehling <Felix.Kuehling@amd.com>
Thu, 4 Nov 2021 19:11:04 +0000 (15:11 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 5 Nov 2021 18:12:45 +0000 (14:12 -0400)
If a kfd_bo was shared (e.g. a dmabuf export), the original kfd_bo may be
freed when the amdgpu_bo still lives on. Free the kfd_bo struct in the
release_notify callback then the amdgpu_bo is freed.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-By: Ramesh Errabolu <Ramesh.Errabolu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

index 751557af09bbb678e74e89556c0d5e6674f2a91d..a15a4787c7ee7990d797bc1014ba851a4a1bb666 100644 (file)
@@ -297,7 +297,7 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct kgd_dev *kgd);
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
 void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
                                struct amdgpu_vm *vm);
-void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
+void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo);
 void amdgpu_amdkfd_reserve_system_mem(uint64_t size);
 #else
 static inline
@@ -312,7 +312,7 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
 }
 
 static inline
-void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
+void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
 {
 }
 #endif
index a00d8acdfb4b38e834b359346118f8cc04b8f0d8..71acd577803ec3a6f9899883c2e25f9ab745c47f 100644 (file)
@@ -207,7 +207,7 @@ static void unreserve_mem_limit(struct amdgpu_device *adev,
        spin_unlock(&kfd_mem_limit.mem_limit_lock);
 }
 
-void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
+void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
 {
        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
        u32 domain = bo->preferred_domains;
@@ -219,6 +219,8 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
        }
 
        unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg);
+
+       kfree(bo->kfd_bo);
 }
 
 
@@ -1607,9 +1609,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
        drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
        if (mem->dmabuf)
                dma_buf_put(mem->dmabuf);
-       drm_gem_object_put(&mem->bo->tbo.base);
        mutex_destroy(&mem->lock);
-       kfree(mem);
+
+       /* If this releases the last reference, it will end up calling
+        * amdgpu_amdkfd_release_notify and kfree the mem struct. That's why
+        * this needs to be the last call here.
+        */
+       drm_gem_object_put(&mem->bo->tbo.base);
 
        return ret;
 }
index aeb92e5677acec9817a5e5cd8ca046ca6436378b..4fcfc2313b8ce122063652779cbee442b027942b 100644 (file)
@@ -1274,7 +1274,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
        abo = ttm_to_amdgpu_bo(bo);
 
        if (abo->kfd_bo)
-               amdgpu_amdkfd_unreserve_memory_limit(abo);
+               amdgpu_amdkfd_release_notify(abo);
 
        /* We only remove the fence if the resv has individualized. */
        WARN_ON_ONCE(bo->type == ttm_bo_type_kernel