From: Alex Deucher Date: Fri, 19 Apr 2024 03:30:46 +0000 (-0400) Subject: drm/amdgpu/mes11: Use a separate fence per transaction X-Git-Tag: io_uring-6.10-20240523~68^2~7^2~51 X-Git-Url: https://git.kernel.dk/?a=commitdiff_plain;h=eef016ba89862ba8317916269f9f369f317cd264;p=linux-2.6-block.git drm/amdgpu/mes11: Use a separate fence per transaction We can't use a shared fence location because each transaction should be considered independently. Reviewed-by: Shaoyun.liu Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 310c8f0c21da..21d1d508ee10 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -32,6 +32,18 @@ #define AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS 1024 #define AMDGPU_ONE_DOORBELL_SIZE 8 +signed long amdgpu_mes_fence_wait_polling(u64 *fence, + u64 wait_seq, + signed long timeout) +{ + + while ((s64)(wait_seq - *fence) > 0 && timeout > 0) { + udelay(2); + timeout -= 2; + } + return timeout > 0 ? timeout : 0; +} + int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev) { return roundup(AMDGPU_ONE_DOORBELL_SIZE * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 6b3e1844eac5..b99a2b3cffe3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -340,6 +340,10 @@ struct amdgpu_mes_funcs { #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev)) #define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev)) +signed long amdgpu_mes_fence_wait_polling(u64 *fence, + u64 wait_seq, + signed long timeout); + int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs); int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 5c08ad234439..0d1407f25005 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -163,6 +163,10 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, unsigned long flags; signed long timeout = 3000000; /* 3000 ms */ const char *op_str, *misc_op_str; + u32 fence_offset; + u64 fence_gpu_addr; + u64 *fence_ptr; + int ret; if (x_pkt->header.opcode >= MES_SCH_API_MAX) return -EINVAL; @@ -175,15 +179,24 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, } BUG_ON(size % 4 != 0); + ret = amdgpu_device_wb_get(adev, &fence_offset); + if (ret) + return ret; + fence_gpu_addr = + adev->wb.gpu_addr + (fence_offset * 4); + fence_ptr = (u64 *)&adev->wb.wb[fence_offset]; + *fence_ptr = 0; + spin_lock_irqsave(&mes->ring_lock, flags); if (amdgpu_ring_alloc(ring, ndw)) { spin_unlock_irqrestore(&mes->ring_lock, flags); + amdgpu_device_wb_free(adev, fence_offset); return -ENOMEM; } api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); - api_status->api_completion_fence_addr = mes->ring.fence_drv.gpu_addr; - api_status->api_completion_fence_value = ++mes->ring.fence_drv.sync_seq; + api_status->api_completion_fence_addr = fence_gpu_addr; + api_status->api_completion_fence_value = 1; amdgpu_ring_write_multiple(ring, pkt, ndw); amdgpu_ring_commit(ring); @@ -199,8 +212,8 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, else dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode); - r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, - timeout); + r = amdgpu_mes_fence_wait_polling(fence_ptr, (u64)1, timeout); + amdgpu_device_wb_free(adev, fence_offset); if (r < 1) { if (misc_op_str)