drm/amdkfd: Add rec SDMA engines support with limited XGMI
authorShane Xiao <shane.xiao@amd.com>
Thu, 10 Apr 2025 04:35:15 +0000 (12:35 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Sun, 13 Apr 2025 13:56:32 +0000 (09:56 -0400)
This patch adds recommended SDMA engines with limited XGMI SDMA engines.
It will help improve overall performance for device to device copies
with this optimization.

v2: Update the formatting issues and data type

Signed-off-by: Shane Xiao <shane.xiao@amd.com>
Suggested-by: Jonathan Kim <jonathan.kim@amd.com>
Reviewed-by: Jonathan Kim <jonathan.kim@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_topology.c

index 9bbee484d57cc447623230135aab9af9d779ec42..baa2374acdeb53ada6e52738662df4e86ac1fc80 100644 (file)
@@ -1267,34 +1267,41 @@ static void kfd_set_recommended_sdma_engines(struct kfd_topology_device *to_dev,
 {
        struct kfd_node *gpu = outbound_link->gpu;
        struct amdgpu_device *adev = gpu->adev;
-       int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes;
+       unsigned int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes;
+       unsigned int num_xgmi_sdma_engines = kfd_get_num_xgmi_sdma_engines(gpu);
+       unsigned int num_sdma_engines = kfd_get_num_sdma_engines(gpu);
+       uint32_t sdma_eng_id_mask = (1 << num_sdma_engines) - 1;
+       uint32_t xgmi_sdma_eng_id_mask =
+                       ((1 << num_xgmi_sdma_engines) - 1) << num_sdma_engines;
+
        bool support_rec_eng = !amdgpu_sriov_vf(adev) && to_dev->gpu &&
                adev->aid_mask && num_xgmi_nodes && gpu->kfd->num_nodes == 1 &&
-               kfd_get_num_xgmi_sdma_engines(gpu) >= 14 &&
-               (!(adev->flags & AMD_IS_APU) && num_xgmi_nodes == 8);
+               num_xgmi_sdma_engines >= 6 && (!(adev->flags & AMD_IS_APU) &&
+               num_xgmi_nodes == 8);
 
        if (support_rec_eng) {
                int src_socket_id = adev->gmc.xgmi.physical_node_id;
                int dst_socket_id = to_dev->gpu->adev->gmc.xgmi.physical_node_id;
+               unsigned int reshift = num_xgmi_sdma_engines == 6 ? 1 : 0;
 
                outbound_link->rec_sdma_eng_id_mask =
-                       1 << rec_sdma_eng_map[src_socket_id][dst_socket_id];
+                       1 << (rec_sdma_eng_map[src_socket_id][dst_socket_id] >> reshift);
                inbound_link->rec_sdma_eng_id_mask =
-                       1 << rec_sdma_eng_map[dst_socket_id][src_socket_id];
-       } else {
-               int num_sdma_eng = kfd_get_num_sdma_engines(gpu);
-               int i, eng_offset = 0;
+                       1 << (rec_sdma_eng_map[dst_socket_id][src_socket_id] >> reshift);
 
-               if (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI &&
-                   kfd_get_num_xgmi_sdma_engines(gpu) && to_dev->gpu) {
-                       eng_offset = num_sdma_eng;
-                       num_sdma_eng = kfd_get_num_xgmi_sdma_engines(gpu);
-               }
+               /* If recommended engine is out of range, need to reset the mask */
+               if (outbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask)
+                       outbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask;
+               if (inbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask)
+                       inbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask;
 
-               for (i = 0; i < num_sdma_eng; i++) {
-                       outbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset));
-                       inbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset));
-               }
+       } else {
+               uint32_t engine_mask = (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI &&
+                               num_xgmi_sdma_engines && to_dev->gpu) ? xgmi_sdma_eng_id_mask :
+                               sdma_eng_id_mask;
+
+               outbound_link->rec_sdma_eng_id_mask = engine_mask;
+               inbound_link->rec_sdma_eng_id_mask = engine_mask;
        }
 }