drm/amdkfd: add gc 9.5.0 support on kfd
authorAlex Sierra <alex.sierra@amd.com>
Wed, 21 Feb 2024 21:02:15 +0000 (15:02 -0600)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 10 Dec 2024 15:26:51 +0000 (10:26 -0500)
Initial support for GC 9.5.0.

v2: squash in pqm_clean_queue_resource() fix from Lijo

Signed-off-by: Alex Sierra <alex.sierra@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_crat.c
drivers/gpu/drm/amd/amdkfd/kfd_debug.h
drivers/gpu/drm/amd/amdkfd/kfd_device.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_process.c
drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_topology.c

index e5324c5bc6c71454178b6a9a97a57cd5c12ebd4e..693469c18c609b196034a82d324b510c54da9e14 100644 (file)
@@ -1639,6 +1639,7 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc
                        break;
                case IP_VERSION(9, 4, 3):
                case IP_VERSION(9, 4, 4):
+               case IP_VERSION(9, 5, 0):
                        num_of_cache_types =
                                kfd_fill_gpu_cache_info_from_gfx_config_v2(kdev->kfd,
                                                                        *pcache_info);
index 924d0fd85dfb88bb03e96cbd6af03bb75c03d2da..27aa1a5b120ff7f3261190a395b528ce903c1219 100644 (file)
@@ -79,6 +79,7 @@ static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_node *dev)
        return (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
                KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
                KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+               KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0) ||
                KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0));
 }
 
index 9b51dd75fefc7de2502937e9f9e0352a77cf190f..b58f798bf2d56996db71e9db3d0767a18aa4ae23 100644 (file)
@@ -85,6 +85,7 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
        case IP_VERSION(4, 4, 0):/* ALDEBARAN */
        case IP_VERSION(4, 4, 2):
        case IP_VERSION(4, 4, 5):
+       case IP_VERSION(4, 4, 4):
        case IP_VERSION(5, 0, 0):/* NAVI10 */
        case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
        case IP_VERSION(5, 0, 2):/* NAVI14 */
@@ -152,6 +153,7 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
                break;
        case IP_VERSION(9, 4, 3): /* GC 9.4.3 */
        case IP_VERSION(9, 4, 4): /* GC 9.4.4 */
+       case IP_VERSION(9, 5, 0): /* GC 9.5.0 */
                kfd->device_info.event_interrupt_class =
                                                &event_interrupt_class_v9_4_3;
                break;
@@ -356,6 +358,10 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
                        gfx_target_version = 90402;
                        f2g = &gc_9_4_3_kfd2kgd;
                        break;
+               case IP_VERSION(9, 5, 0):
+                       gfx_target_version = 90500;
+                       f2g = &gc_9_4_3_kfd2kgd;
+                       break;
                /* Navi10 */
                case IP_VERSION(10, 1, 10):
                        gfx_target_version = 100100;
@@ -515,6 +521,10 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
                                             > KFD_CWSR_TMA_OFFSET);
                        kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex;
                        kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex);
+               } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 5, 0)) {
+                       BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_4_3_hex) > PAGE_SIZE);
+                       kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex;
+                       kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex);
                } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) {
                        BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex)
                                             > KFD_CWSR_TMA_OFFSET);
@@ -567,6 +577,7 @@ static int kfd_gws_init(struct kfd_node *node)
                        && kfd->mec2_fw_version >= 0x28) ||
                (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3) ||
                 KFD_GC_VERSION(node) == IP_VERSION(9, 4, 4)) ||
+               (KFD_GC_VERSION(node) == IP_VERSION(9, 5, 0)) ||
                (KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0)
                        && KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0)
                        && kfd->mec2_fw_version >= 0x6b) ||
@@ -733,14 +744,14 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
        last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
        vmid_num_kfd = last_vmid_kfd - first_vmid_kfd + 1;
 
-       /* For GFX9.4.3, we need special handling for VMIDs depending on
-        * partition mode.
+       /* For multi-partition capable GPUs, we need special handling for VMIDs
+        * depending on partition mode.
         * In CPX mode, the VMID range needs to be shared between XCDs.
         * Additionally, there are 13 VMIDs (3-15) available for KFD. To
         * divide them equally, we change starting VMID to 4 and not use
         * VMID 3.
-        * If the VMID range changes for GFX9.4.3, then this code MUST be
-        * revisited.
+        * If the VMID range changes for multi-partition capable GPUs, then
+        * this code MUST be revisited.
         */
        if (kfd->adev->xcp_mgr) {
                partition_mode = amdgpu_xcp_query_partition_mode(kfd->adev->xcp_mgr,
@@ -805,14 +816,12 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
                kfd->hive_id = kfd->adev->gmc.xgmi.hive_id;
 
        /*
-        * For GFX9.4.3, the KFD abstracts all partitions within a socket as
-        * xGMI connected in the topology so assign a unique hive id per
-        * device based on the pci device location if device is in PCIe mode.
+        * For multi-partition capable GPUs, the KFD abstracts all partitions
+        * within a socket as xGMI connected in the topology so assign a unique
+        * hive id per device based on the pci device location if device is in
+        * PCIe mode.
         */
-       if (!kfd->hive_id &&
-           (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) ||
-            KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4)) &&
-           kfd->num_nodes > 1)
+       if (!kfd->hive_id && kfd->num_nodes > 1)
                kfd->hive_id = pci_dev_id(kfd->adev->pdev);
 
        kfd->noretry = kfd->adev->gmc.noretry;
@@ -850,12 +859,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
                                KFD_XCP_MEMORY_SIZE(node->adev, node->node_id) >> 20);
                }
 
-               if ((KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) ||
-                    KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4)) &&
-                   partition_mode == AMDGPU_CPX_PARTITION_MODE &&
+               if (partition_mode == AMDGPU_CPX_PARTITION_MODE &&
                    kfd->num_nodes != 1) {
-                       /* For GFX9.4.3 and CPX mode, first XCD gets VMID range
-                        * 4-9 and second XCD gets VMID range 10-15.
+                       /* For multi-partition capable GPUs and CPX mode, first
+                        * XCD gets VMID range 4-9 and second XCD gets VMID
+                        * range 10-15.
                         */
 
                        node->vm_info.first_vmid_kfd = (i%2 == 0) ?
@@ -879,8 +887,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
                amdgpu_amdkfd_get_local_mem_info(kfd->adev,
                                        &node->local_mem_info, node->xcp);
 
-               if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) ||
-                   KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4))
+               if (kfd->adev->xcp_mgr)
                        kfd_setup_interrupt_bitmap(node, i);
 
                /* Initialize the KFD node */
index 210bcc048f4c511b9cfbd57034964055546135fb..67137e674f1d08dc21ae9cdd10c83c0592e29e88 100644 (file)
@@ -64,7 +64,8 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
                        qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
 
                if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3) ||
-                   KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4))
+                   KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4) ||
+                   KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 5, 0))
                        qpd->sh_mem_config |=
                                (1 << SH_MEM_CONFIG__F8_MODE__SHIFT);
 
index 84e8ea3a8a0c940561c9f97eb62922d8d3311ecf..ff417d5361c42e7c080b9eb1892a941d5157bae9 100644 (file)
@@ -78,7 +78,8 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
        m->compute_static_thread_mgmt_se2 = se_mask[2];
        m->compute_static_thread_mgmt_se3 = se_mask[3];
        if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) &&
-           KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4)) {
+           KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) &&
+           KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0)) {
                m->compute_static_thread_mgmt_se4 = se_mask[4];
                m->compute_static_thread_mgmt_se5 = se_mask[5];
                m->compute_static_thread_mgmt_se6 = se_mask[6];
@@ -301,7 +302,8 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
                m->cp_hqd_ctx_save_control = 0;
 
        if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) &&
-           KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4))
+           KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) &&
+           KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0))
                update_cu_mask(mm, mqd, minfo, 0);
        set_priority(m, q);
 
@@ -885,7 +887,8 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
                mqd->debugfs_show_mqd = debugfs_show_mqd;
 #endif
                if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
-                   KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) {
+                   KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+                   KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) {
                        mqd->init_mqd = init_mqd_v9_4_3;
                        mqd->load_mqd = load_mqd_v9_4_3;
                        mqd->update_mqd = update_mqd_v9_4_3;
@@ -909,8 +912,10 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
 #if defined(CONFIG_DEBUG_FS)
                mqd->debugfs_show_mqd = debugfs_show_mqd;
 #endif
+               mqd->check_preemption_failed = check_preemption_failed;
                if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
-                   KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) {
+                   KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+                   KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) {
                        mqd->init_mqd = init_mqd_hiq_v9_4_3;
                        mqd->load_mqd = hiq_load_mqd_kiq_v9_4_3;
                        mqd->destroy_mqd = destroy_hiq_mqd_v9_4_3;
index cac706dd66ed87294cbc8929217d965dd5cd43a6..4984b41cd372131cb52543c35d3e12e4dd35fec6 100644 (file)
@@ -260,7 +260,8 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
        default:
                if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2) ||
                    KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3) ||
-                   KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 4))
+                   KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 4) ||
+                   KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 5, 0))
                        pm->pmf = &kfd_aldebaran_pm_funcs;
                else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1))
                        pm->pmf = &kfd_v9_pm_funcs;
index 9e5ca0b93b2a256f511ca07820f89724bf888bf0..800e4ae5b5735663ad0b99b8819080cf2290fefd 100644 (file)
@@ -207,7 +207,8 @@ enum cache_policy {
 #define KFD_SUPPORT_XNACK_PER_PROCESS(dev)\
        ((KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2)) ||        \
         (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)) ||        \
-        (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)))
+        (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) ||        \
+        (KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)))
 
 struct kfd_node;
 
@@ -1150,7 +1151,8 @@ static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev,
        uint32_t i;
 
        if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) &&
-           KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4))
+           KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) &&
+           KFD_GC_VERSION(dev) != IP_VERSION(9, 5, 0))
                return dev->nodes[0];
 
        for (i = 0; i < dev->num_nodes; i++)
index d0ee173acf824603172ba0091e13970744498efe..0976b5b0e8e8c28e74d2e0e3cad8352ec5b9a315 100644 (file)
@@ -2127,10 +2127,11 @@ int kfd_process_drain_interrupts(struct kfd_process_device *pdd)
        irq_drain_fence[3] = pdd->process->pasid;
 
        /*
-        * For GFX 9.4.3, send the NodeId also in IH cookie DW[3]
+        * For GFX 9.4.3/9.5.0, send the NodeId also in IH cookie DW[3]
         */
        if (KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 3) ||
-           KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 4)) {
+           KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 4) ||
+           KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 5, 0)) {
                node_id = ffs(pdd->dev->interrupt_bitmap) - 1;
                irq_drain_fence[3] |= node_id << 16;
        }
index 59b92d66e958905448a4253908635e817e4b8039..9df56f8e09f910680d7f6f37710fe65abb0bdf30 100644 (file)
@@ -131,8 +131,9 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
        if (!gws && pdd->qpd.num_gws == 0)
                return -EINVAL;
 
-       if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) &&
-           KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) &&
+       if ((KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) &&
+            KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) &&
+            KFD_GC_VERSION(dev) != IP_VERSION(9, 5, 0)) &&
            !dev->kfd->shared_resources.enable_mes) {
                if (gws)
                        ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
@@ -197,6 +198,7 @@ static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
        if (pqn->q->gws) {
                if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
                    KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 4) &&
+                   KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 5, 0) &&
                    !dev->kfd->shared_resources.enable_mes)
                        amdgpu_amdkfd_remove_gws_from_process(
                                pqm->process->kgd_process_info, pqn->q->gws);
@@ -320,11 +322,12 @@ int pqm_create_queue(struct process_queue_manager *pqm,
        unsigned int max_queues = 127; /* HWS limit */
 
        /*
-        * On GFX 9.4.3, increase the number of queues that
-        * can be created to 255. No HWS limit on GFX 9.4.3.
+        * On GFX 9.4.3/9.5.0, increase the number of queues that
+        * can be created to 255. No HWS limit on GFX 9.4.3/9.5.0.
         */
        if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
-           KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4))
+           KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+           KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0))
                max_queues = 255;
 
        q = NULL;
index 9476e30d6baa1b5acd8b98050a506d1cf8b54f6b..52c6c5e9d264e3db0c2c3c04460f04b4e6437324 100644 (file)
@@ -1714,7 +1714,8 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
                pcache->cacheline_size = pcache_info[cache_type].cache_line_size;
 
                if (KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 3) ||
-                   KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 4))
+                   KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 4) ||
+                   KFD_GC_VERSION(knode) == IP_VERSION(9, 5, 0))
                        mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
                else
                        mode = UNKNOWN_MEMORY_PARTITION_MODE;