drm/amdgpu: Increase tlb flush timeout for sriov
authorDusica Milinkovic <Dusica.Milinkovic@amd.com>
Wed, 10 Aug 2022 07:43:15 +0000 (09:43 +0200)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 16 Aug 2022 22:08:01 +0000 (18:08 -0400)
[Why]
During multi-vf executing benchmark (Luxmark) observed kiq error timeout.
It happenes because all of VFs do the tlb invalidation at the same time.
Although each VF has the invalidate register set, from hardware side
the invalidate requests are queue to execute.

[How]
In case of 12 VF increase timeout on 12*100ms

Signed-off-by: Dusica Milinkovic <Dusica.Milinkovic@amd.com>
Acked-by: Shaoyun Liu <shaoyun.liu@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

index e146810c700ba7846c926ef205f6480418b3f415..d597e2656c475da6642e88156c8a9d6e056986da 100644 (file)
@@ -317,7 +317,7 @@ enum amdgpu_kiq_irq {
        AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
        AMDGPU_CP_KIQ_IRQ_LAST
 };
-
+#define SRIOV_USEC_TIMEOUT  1200000 /* wait 12 * 100ms for SRIOV */
 #define MAX_KIQ_REG_WAIT       5000 /* in usecs, 5ms */
 #define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
 #define MAX_KIQ_REG_TRY 1000
index 9ae8cdaa033ee391cdbfea5761879ac87af71862..f513e2c2e964f0c9b3c8d8d522e96692eb55259b 100644 (file)
@@ -419,6 +419,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
        uint32_t seq;
        uint16_t queried_pasid;
        bool ret;
+       u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
        struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
        struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 
@@ -437,7 +438,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 
                amdgpu_ring_commit(ring);
                spin_unlock(&adev->gfx.kiq.ring_lock);
-               r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+               r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
                if (r < 1) {
                        dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
                        return -ETIME;
index ab89d91975ab070478487b0545eda02aa2e9ff2b..4603653916f5a551854a784c75da1817731f4bf9 100644 (file)
@@ -896,6 +896,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
        uint32_t seq;
        uint16_t queried_pasid;
        bool ret;
+       u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
        struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
        struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 
@@ -935,7 +936,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 
                amdgpu_ring_commit(ring);
                spin_unlock(&adev->gfx.kiq.ring_lock);
-               r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+               r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
                if (r < 1) {
                        dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
                        up_read(&adev->reset_domain->sem);