From: Alex Deucher Date: Thu, 29 May 2025 16:58:53 +0000 (-0400) Subject: drm/amdgpu: move force completion into ring resets X-Git-Tag: io_uring-6.17-20250815~29^2~18^2~23 X-Git-Url: https://git.kernel.dk/?a=commitdiff_plain;h=2dee58ca471dae05c473270d0fb74efe01a78ccb;p=linux-block.git drm/amdgpu: move force completion into ring resets Move the force completion handling into each ring reset function so that each engine can determine whether or not it needs to force completion on the jobs in the ring. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 93413be59e08..177f04491a11 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -161,10 +161,8 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) r = amdgpu_ring_reset(ring, job->vmid, NULL); if (!r) { - if (is_guilty) { + if (is_guilty) atomic_inc(&ring->adev->gpu_reset_counter); - amdgpu_fence_driver_force_completion(ring); - } drm_sched_wqueue_start(&ring->sched); dev_err(adev->dev, "Ring %s reset succeeded\n", ring->sched.name); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 4b2af95203b2..4d0ee3ffe985 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -9577,7 +9577,11 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, return r; } - return amdgpu_ring_test_ring(ring); + r = amdgpu_ring_test_ring(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, @@ -9650,7 +9654,11 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, if (r) return r; - return amdgpu_ring_test_ring(ring); + r = amdgpu_ring_test_ring(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static void gfx_v10_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 8d5c0ab016d2..39f4dd18c277 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -6842,7 +6842,11 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, return r; } - return amdgpu_ring_test_ring(ring); + r = amdgpu_ring_test_ring(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring) @@ -7004,7 +7008,11 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, return r; } - return amdgpu_ring_test_ring(ring); + r = amdgpu_ring_test_ring(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 795bd353a9ce..964fa3f2e271 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -5337,7 +5337,11 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, return r; } - return amdgpu_ring_test_ring(ring); + r = amdgpu_ring_test_ring(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static int gfx_v12_0_reset_compute_pipe(struct amdgpu_ring *ring) @@ -5452,7 +5456,11 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, return r; } - return amdgpu_ring_test_ring(ring); + r = amdgpu_ring_test_ring(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static void gfx_v12_0_ring_begin_use(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index d91e0423c482..95e319974f22 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -7242,7 +7242,12 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, DRM_ERROR("fail to remap queue\n"); return r; } - return amdgpu_ring_test_ring(ring); + + r = amdgpu_ring_test_ring(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 272f0f8e41d4..8bfee17a826e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -3620,7 +3620,12 @@ pipe_reset: dev_err(adev->dev, "fail to remap queue\n"); return r; } - return amdgpu_ring_test_ring(ring); + + r = amdgpu_ring_test_ring(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } enum amdgpu_gfx_cp_ras_mem_id { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 93eb71d2ce30..6621a7b1f29f 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -768,9 +768,15 @@ static int jpeg_v2_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid, struct amdgpu_fence *timedout_fence) { + int r; + jpeg_v2_0_stop(ring->adev); jpeg_v2_0_start(ring->adev); - return amdgpu_ring_test_helper(ring); + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 45b8702d20ad..44a5c0e82ca4 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -647,9 +647,15 @@ static int jpeg_v2_5_ring_reset(struct amdgpu_ring *ring, unsigned int vmid, struct amdgpu_fence *timedout_fence) { + int r; + jpeg_v2_5_stop_inst(ring->adev, ring->me); jpeg_v2_5_start_inst(ring->adev, ring->me); - return amdgpu_ring_test_helper(ring); + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index 9bd0ae4a24a1..e813af4eedd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -559,9 +559,15 @@ static int jpeg_v3_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid, struct amdgpu_fence *timedout_fence) { + int r; + jpeg_v3_0_stop(ring->adev); jpeg_v3_0_start(ring->adev); - return amdgpu_ring_test_helper(ring); + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index 4fc1ef85b294..190f0742d701 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -724,12 +724,18 @@ static int jpeg_v4_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid, struct amdgpu_fence *timedout_fence) { + int r; + if (amdgpu_sriov_vf(ring->adev)) return -EINVAL; jpeg_v4_0_stop(ring->adev); jpeg_v4_0_start(ring->adev); - return amdgpu_ring_test_helper(ring); + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index bdc7612ba56a..04755b7a62d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -1147,12 +1147,18 @@ static int jpeg_v4_0_3_ring_reset(struct amdgpu_ring *ring, unsigned int vmid, struct amdgpu_fence *timedout_fence) { + int r; + if (amdgpu_sriov_vf(ring->adev)) return -EOPNOTSUPP; jpeg_v4_0_3_core_stall_reset(ring); jpeg_v4_0_3_start_jrbc(ring); - return amdgpu_ring_test_helper(ring); + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c index aee4f50a0f52..e7f942dc714a 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c @@ -838,12 +838,18 @@ static int jpeg_v5_0_1_ring_reset(struct amdgpu_ring *ring, unsigned int vmid, struct amdgpu_fence *timedout_fence) { + int r; + if (amdgpu_sriov_vf(ring->adev)) return -EOPNOTSUPP; jpeg_v5_0_1_core_stall_reset(ring); jpeg_v5_0_1_init_jrbc(ring); - return amdgpu_ring_test_helper(ring); + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static const struct amd_ip_funcs jpeg_v5_0_1_ip_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index b8828432c98a..c13ae87d4e64 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -1679,6 +1679,7 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid, struct amdgpu_fence *timedout_fence) { + bool is_guilty = ring->funcs->is_guilty(ring); struct amdgpu_device *adev = ring->adev; u32 id = ring->me; int r; @@ -1689,8 +1690,13 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, amdgpu_amdkfd_suspend(adev, true); r = amdgpu_sdma_reset_engine(adev, id); amdgpu_amdkfd_resume(adev, true); + if (r) + return r; - return r; + if (is_guilty) + amdgpu_fence_driver_force_completion(ring); + + return 0; } static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring) @@ -1734,8 +1740,8 @@ static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring) static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - u32 inst_mask; - int i; + u32 inst_mask, tmp_mask; + int i, r; inst_mask = 1 << ring->me; udelay(50); @@ -1752,7 +1758,24 @@ static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring) return -ETIMEDOUT; } - return sdma_v4_4_2_inst_start(adev, inst_mask, true); + r = sdma_v4_4_2_inst_start(adev, inst_mask, true); + if (r) + return r; + + tmp_mask = inst_mask; + for_each_inst(i, tmp_mask) { + ring = &adev->sdma.instance[i].ring; + + amdgpu_fence_driver_force_completion(ring); + + if (adev->sdma.has_page_queue) { + struct amdgpu_ring *page = &adev->sdma.instance[i].page; + + amdgpu_fence_driver_force_completion(page); + } + } + + return r; } static int sdma_v4_4_2_soft_reset_engine(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 2fd72c85cf01..4d72b085b3dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1618,7 +1618,10 @@ static int sdma_v5_0_restore_queue(struct amdgpu_ring *ring) r = sdma_v5_0_gfx_resume_instance(adev, inst_id, true); amdgpu_gfx_rlc_exit_safe_mode(adev, 0); - return r; + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static int sdma_v5_0_ring_preempt_ib(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 21421f1bd209..42a25150f83a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1534,7 +1534,10 @@ static int sdma_v5_2_restore_queue(struct amdgpu_ring *ring) r = sdma_v5_2_gfx_resume_instance(adev, inst_id, true); amdgpu_gfx_rlc_exit_safe_mode(adev, 0); - return r; + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static int sdma_v5_2_ring_preempt_ib(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 2455965387e0..c6cb7ff15caa 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1574,7 +1574,11 @@ static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring, if (r) return r; - return sdma_v6_0_gfx_resume_instance(adev, i, true); + r = sdma_v6_0_gfx_resume_instance(adev, i, true); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static int sdma_v6_0_set_trap_irq_state(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 6210be7e4a52..b00c63812899 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -826,7 +826,11 @@ static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring, if (r) return r; - return sdma_v7_0_gfx_resume_instance(adev, i, true); + r = sdma_v7_0_gfx_resume_instance(adev, i, true); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 244359fa4aac..6c25e9fc4f0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -1973,6 +1973,7 @@ static int vcn_v4_0_ring_reset(struct amdgpu_ring *ring, { struct amdgpu_device *adev = ring->adev; struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; + int r; if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) return -EOPNOTSUPP; @@ -1980,7 +1981,11 @@ static int vcn_v4_0_ring_reset(struct amdgpu_ring *ring, vcn_v4_0_stop(vinst); vcn_v4_0_start(vinst); - return amdgpu_ring_test_helper(ring); + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 75c07ebf7fe4..1e1dd61b774e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -1623,8 +1623,10 @@ static int vcn_v4_0_3_ring_reset(struct amdgpu_ring *ring, vcn_v4_0_3_hw_init_inst(vinst); vcn_v4_0_3_start_dpg_mode(vinst, adev->vcn.inst[ring->me].indirect_sram); r = amdgpu_ring_test_helper(ring); - - return r; + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 731f7762c3e0..9c02446bb1a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -1471,6 +1471,7 @@ static int vcn_v4_0_5_ring_reset(struct amdgpu_ring *ring, { struct amdgpu_device *adev = ring->adev; struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; + int r; if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) return -EOPNOTSUPP; @@ -1478,7 +1479,11 @@ static int vcn_v4_0_5_ring_reset(struct amdgpu_ring *ring, vcn_v4_0_5_stop(vinst); vcn_v4_0_5_start(vinst); - return amdgpu_ring_test_helper(ring); + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index f975994b3ff4..c8924f97cf58 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -1198,6 +1198,7 @@ static int vcn_v5_0_0_ring_reset(struct amdgpu_ring *ring, { struct amdgpu_device *adev = ring->adev; struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; + int r; if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) return -EOPNOTSUPP; @@ -1205,7 +1206,11 @@ static int vcn_v5_0_0_ring_reset(struct amdgpu_ring *ring, vcn_v5_0_0_stop(vinst); vcn_v5_0_0_start(vinst); - return amdgpu_ring_test_helper(ring); + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + amdgpu_fence_driver_force_completion(ring); + return 0; } static const struct amdgpu_ring_funcs vcn_v5_0_0_unified_ring_vm_funcs = {