drm/amdgpu: abort KIQ waits when there is a pending reset
authorVictor Skvortsov <victor.skvortsov@amd.com>
Fri, 2 Aug 2024 18:22:26 +0000 (14:22 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 16 Aug 2024 18:27:50 +0000 (14:27 -0400)
Stop waiting for the KIQ to return back when there is a reset pending.
It's quite likely that the KIQ will never response.

Signed-off-by: Koenig Christian <Christian.Koenig@amd.com>
Suggested-by: Lazar Lijo <Lijo.Lazar@amd.com>
Tested-by: Victor Skvortsov <victor.skvortsov@amd.com>
Signed-off-by: Victor Skvortsov <victor.skvortsov@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h

index b49b3650fd62173b15c03d38889e63949b5b5bf5..17a19d49d30a5768532700c1269952b97074a394 100644 (file)
@@ -786,7 +786,8 @@ void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev,
                goto failed_kiq;
 
        might_sleep();
-       while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+       while (r < 1 && cnt++ < MAX_KIQ_REG_TRY &&
+              !amdgpu_reset_pending(adev->reset_domain)) {
 
                msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
                r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
index 4ae581f3fcb54b60701a226d18951b1cb416e84e..1cb920abc2fe9e1ae6a6909216a766750370dd3b 100644 (file)
@@ -136,6 +136,12 @@ static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *doma
        return queue_work(domain->wq, work);
 }
 
+static inline bool amdgpu_reset_pending(struct amdgpu_reset_domain *domain)
+{
+       lockdep_assert_held(&domain->sem);
+       return rwsem_is_contended(&domain->sem);
+}
+
 void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain);
 
 void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain);