drm/amdgpu: optimize insert_nop using multi dwords
authorSunil Khatri <sunil.khatri@amd.com>
Tue, 8 Oct 2024 13:02:16 +0000 (18:32 +0530)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 15 Oct 2024 15:16:40 +0000 (11:16 -0400)
Optimize the ring_insert_nop fn for n dwords in one
step rather then call to amdgpu_ring_write for each
nop packet. This avoid function call for each nop
packet and also wptr is updated once only.

Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
Suggested-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c

index 03bce2fa866a6527c1ef8788c949332b16a11830..42f616c05f504e59610c11b6c7df5e02d405a2e0 100644 (file)
@@ -108,10 +108,26 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw)
  */
 void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
 {
-       int i;
+       uint32_t occupied, chunk1, chunk2;
+       uint32_t *dst;
 
-       for (i = 0; i < count; i++)
-               amdgpu_ring_write(ring, ring->funcs->nop);
+       occupied = ring->wptr & ring->buf_mask;
+       dst = (void *)&ring->ring[occupied];
+       chunk1 = ring->buf_mask + 1 - occupied;
+       chunk1 = (chunk1 >= count) ? count : chunk1;
+       chunk2 = count - chunk1;
+
+       if (chunk1)
+               memset32(dst, ring->funcs->nop, chunk1);
+
+       if (chunk2) {
+               dst = (void *)ring->ring;
+               memset32(dst, ring->funcs->nop, chunk2);
+       }
+
+       ring->wptr += count;
+       ring->wptr &= ring->ptr_mask;
+       ring->count_dw -= count;
 }
 
 /**