drm/amdgpu/gfx11: wait for reset done before remap
authorJiadong Zhu <Jiadong.Zhu@amd.com>
Tue, 2 Jul 2024 02:01:21 +0000 (10:01 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 2 Sep 2024 15:40:34 +0000 (11:40 -0400)
There is a racing condition that cp firmware modifies
MQD in reset sequence after driver updates it for
remapping. We have to wait till CP_HQD_ACTIVE becoming
false then remap the queue.

Acked-by: Vitaly Prosyak <vitaly.prosyak@amd.com>
Signed-off-by: Jiadong Zhu <Jiadong.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

index 561edfcc840474ee1f3272cd56d37447b1b2443c..2f5eed56892d1e366c6121abb4dc8d53f93c462f 100644 (file)
@@ -6582,16 +6582,29 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
 static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
 {
        struct amdgpu_device *adev = ring->adev;
-       int r;
+       int i, r = 0;
 
        gfx_v11_0_set_safe_mode(adev, 0);
        mutex_lock(&adev->srbm_mutex);
        soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
        WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
        WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
+
+       /* make sure dequeue is complete*/
+       for (i = 0; i < adev->usec_timeout; i++) {
+               if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
+                       break;
+               udelay(1);
+       }
+       if (i >= adev->usec_timeout)
+               r = -ETIMEDOUT;
        soc21_grbm_select(adev, 0, 0, 0, 0);
        mutex_unlock(&adev->srbm_mutex);
        gfx_v11_0_unset_safe_mode(adev, 0);
+       if (r) {
+               dev_err(adev->dev, "fail to wait on hqd deactivate\n");
+               return r;
+       }
 
        r = amdgpu_bo_reserve(ring->mqd_obj, false);
        if (unlikely(r != 0)) {