drm/amd: Add per-ring reset for vcn v4.0.5 use
authorMario Limonciello <mario.limonciello@amd.com>
Tue, 6 May 2025 20:49:46 +0000 (15:49 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 7 May 2025 21:48:11 +0000 (17:48 -0400)
There is a problem occurring on VCN 4.0.5 where in some situations a job
is timing out.  This triggers a job timeout which then causes a GPU
reset for recovery.  That has exposed a number of issues with GPU reset
that have since been fixed. But also a GPU reset isn't actually needed
for this circumstance. Just restarting the ring is enough.

Add a reset callback for the ring which will stop and start VCN if the
issue happens.

Link: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12528
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3909
Link: https://lore.kernel.org/r/20250506204948.12048-2-mario.limonciello@amd.com
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c

index 558469744f3a3187a9acf3e00d57919608a3e95e..ed00d35039c137b6beab1c65258a7611ca6a42cb 100644 (file)
@@ -219,6 +219,13 @@ static int vcn_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block)
                        adev->vcn.inst[i].pause_dpg_mode = vcn_v4_0_5_pause_dpg_mode;
        }
 
+       adev->vcn.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+       adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+
+       r = amdgpu_vcn_sysfs_reset_mask_init(adev);
+       if (r)
+               return r;
+
        if (amdgpu_sriov_vf(adev)) {
                r = amdgpu_virt_alloc_mm_table(adev);
                if (r)
@@ -1440,6 +1447,20 @@ static void vcn_v4_0_5_unified_ring_set_wptr(struct amdgpu_ring *ring)
        }
 }
 
+static int vcn_v4_0_5_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
+{
+       struct amdgpu_device *adev = ring->adev;
+       struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+
+       if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
+               return -EOPNOTSUPP;
+
+       vcn_v4_0_5_stop(vinst);
+       vcn_v4_0_5_start(vinst);
+
+       return amdgpu_ring_test_helper(ring);
+}
+
 static struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = {
        .type = AMDGPU_RING_TYPE_VCN_ENC,
        .align_mask = 0x3f,
@@ -1467,6 +1488,7 @@ static struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = {
        .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
        .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
        .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+       .reset = vcn_v4_0_5_ring_reset,
 };
 
 /**