drm/amdgpu: Enable tunneling on high-priority compute queues
authorFriedrich Vock <friedrich.vock@gmx.de>
Sat, 2 Dec 2023 00:17:40 +0000 (01:17 +0100)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 13 Dec 2023 20:23:59 +0000 (15:23 -0500)
This improves latency if the GPU is already busy with other work.
This is useful for VR compositors that submit highly latency-sensitive
compositing work on high-priority compute queues while the GPU is busy
rendering the next frame.

Userspace merge request:
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26462

v2: bump driver version (Alex)

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Friedrich Vock <friedrich.vock@gmx.de>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

index 82d29406927668b18d2ec4623d113eaa7480c302..616b6c91176796b6f72be410c5e8541e8416492e 100644 (file)
@@ -791,6 +791,7 @@ struct amdgpu_mqd_prop {
        uint64_t eop_gpu_addr;
        uint32_t hqd_pipe_priority;
        uint32_t hqd_queue_priority;
+       bool allow_tunneling;
        bool hqd_active;
 };
 
index a194db09cde6ac95625c1d0d84c514d027651eb6..880137774b4eb2649559e03cceba5ca53fd70c8d 100644 (file)
  *   3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support
  * - 3.55.0 - Add AMDGPU_INFO_GPUVM_FAULT query
  * - 3.56.0 - Update IB start address and size alignment for decode and encode
+ * - 3.57.0 - Compute tunneling on GFX10+
  */
 #define KMS_DRIVER_MAJOR       3
-#define KMS_DRIVER_MINOR       56
+#define KMS_DRIVER_MINOR       57
 #define KMS_DRIVER_PATCHLEVEL  0
 
 /*
index 6a80d3ec887e98972d84e8c1f75b6fdba5a86143..45424ebf9681430fefc21bdc33d6aa2c6e5f6c91 100644 (file)
@@ -642,6 +642,10 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring,
                                    struct amdgpu_mqd_prop *prop)
 {
        struct amdgpu_device *adev = ring->adev;
+       bool is_high_prio_compute = ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE &&
+                                   amdgpu_gfx_is_high_priority_compute_queue(adev, ring);
+       bool is_high_prio_gfx = ring->funcs->type == AMDGPU_RING_TYPE_GFX &&
+                               amdgpu_gfx_is_high_priority_graphics_queue(adev, ring);
 
        memset(prop, 0, sizeof(*prop));
 
@@ -659,10 +663,8 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring,
         */
        prop->hqd_active = ring->funcs->type == AMDGPU_RING_TYPE_KIQ;
 
-       if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE &&
-            amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) ||
-           (ring->funcs->type == AMDGPU_RING_TYPE_GFX &&
-            amdgpu_gfx_is_high_priority_graphics_queue(adev, ring))) {
+       prop->allow_tunneling = is_high_prio_compute;
+       if (is_high_prio_compute || is_high_prio_gfx) {
                prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
                prop->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
        }
index c8a3bf01743f6381ec218077bcb42790f5a7b741..73f6d7e72c737537f17264746b061a936b4960e5 100644 (file)
@@ -6593,7 +6593,8 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
 #endif
        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
-       tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+       tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
+                           prop->allow_tunneling);
        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
        mqd->cp_hqd_pq_control = tmp;
index c659ef0f47ce5421f2102556590dc966aa397b39..bdcf96df69e6b28ade574a1107f76c6267e0bc9b 100644 (file)
@@ -3847,7 +3847,8 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
                            (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
-       tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+       tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
+                           prop->allow_tunneling);
        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
        tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
        mqd->cp_hqd_pq_control = tmp;