drm/amdkfd: enable cooperative groups for gfx11
authorJonathan Kim <jonathan.kim@amd.com>
Wed, 12 Jul 2023 20:58:55 +0000 (16:58 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 25 Jul 2023 17:35:43 +0000 (13:35 -0400)
MES can concurrently schedule queues on the device that require
exclusive device access if marked exclusively_scheduled without the
requirement of GWS.  Similar to the F32 HWS, MES will manage
quality of service for these queues.
Use this for cooperative groups since cooperative groups are device
occupancy limited.

Since some GFX11 devices can only be debugged with partial CUs, do not
allow the debugging of cooperative groups on these devices as the CU
occupancy limit will change on attach.

In addition, zero initialize the MES add queue submission vector for MES
initialization tests as we do not want these to be cooperative
dispatches.

Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
drivers/gpu/drm/amd/amdkfd/kfd_debug.c
drivers/gpu/drm/amd/amdkfd/kfd_device.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
drivers/gpu/drm/amd/include/mes_v11_api_def.h

index f808841310fdf0953b857c45fedbd8a9fe0694ba..72ab6a838bb68b925864a797df4dd7266443b34f 100644 (file)
@@ -642,6 +642,8 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
        unsigned long flags;
        int r;
 
+       memset(&queue_input, 0, sizeof(struct mes_add_queue_input));
+
        /* allocate the mes queue buffer */
        queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL);
        if (!queue) {
index 2d6ac30b7135b894674224b826356c30ec3c8f3d..2053954a235ce974c4ca46d54940090da4227cb6 100644 (file)
@@ -224,6 +224,7 @@ struct mes_add_queue_input {
        uint32_t        is_kfd_process;
        uint32_t        is_aql_queue;
        uint32_t        queue_size;
+       uint32_t        exclusively_scheduled;
 };
 
 struct mes_remove_queue_input {
index 1bdaa00c0b466ff2aabfcf77ae41f781ad4a69ef..8e67e965f7ea900fd6cbf0082fea6b5d058800c9 100644 (file)
@@ -214,6 +214,8 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
        mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
        mes_add_queue_pkt.gds_size = input->queue_size;
 
+       mes_add_queue_pkt.exclusively_scheduled = input->exclusively_scheduled;
+
        return mes_v11_0_submit_pkt_and_poll_completion(mes,
                        &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
                        offsetof(union MESAPI__ADD_QUEUE, api_status));
index 76b41b5de28145eb52dc0e297fea9de59cbde817..9307f303c7fd03b9b5616b7c4d2bea6aa3a627eb 100644 (file)
@@ -1487,7 +1487,8 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep,
                goto out_unlock;
        }
 
-       if (!kfd_dbg_has_gws_support(dev) && p->debug_trap_enabled) {
+       if (p->debug_trap_enabled && (!kfd_dbg_has_gws_support(dev) ||
+                                     kfd_dbg_has_cwsr_workaround(dev))) {
                retval = -EBUSY;
                goto out_unlock;
        }
index ccfc81f085ce123ba766f5954e5eecceab1d7ea0..1f82caea59ba9f39b8901ce1134565bd5869e38d 100644 (file)
@@ -753,7 +753,8 @@ int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd,
                if (!KFD_IS_SOC15(pdd->dev))
                        return -ENODEV;
 
-               if (!kfd_dbg_has_gws_support(pdd->dev) && pdd->qpd.num_gws)
+               if (pdd->qpd.num_gws && (!kfd_dbg_has_gws_support(pdd->dev) ||
+                                        kfd_dbg_has_cwsr_workaround(pdd->dev)))
                        return -EBUSY;
        }
 
index 0b3dc754e06ba264bf25835f33c7607a6fae4dfe..ebc9674d3ce133921898e0b871abccab617f19f5 100644 (file)
@@ -508,6 +508,7 @@ static int kfd_gws_init(struct kfd_node *node)
 {
        int ret = 0;
        struct kfd_dev *kfd = node->kfd;
+       uint32_t mes_rev = node->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
 
        if (node->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
                return 0;
@@ -524,7 +525,10 @@ static int kfd_gws_init(struct kfd_node *node)
                (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3)) ||
                (KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0)
                        && KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0)
-                       && kfd->mec2_fw_version >= 0x6b))))
+                       && kfd->mec2_fw_version >= 0x6b) ||
+               (KFD_GC_VERSION(node) >= IP_VERSION(11, 0, 0)
+                       && KFD_GC_VERSION(node) < IP_VERSION(12, 0, 0)
+                       && mes_rev >= 68))))
                ret = amdgpu_amdkfd_alloc_gws(node->adev,
                                node->adev->gds.gws_size, &node->gws);
 
index 761963ad615474f1414b726210ef4f4935f6efb2..71b7f16c01734f5472fb42dcef91285faddb048c 100644 (file)
@@ -237,10 +237,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
        }
        queue_input.queue_type = (uint32_t)queue_type;
 
-       if (q->gws) {
-               queue_input.gws_base = 0;
-               queue_input.gws_size = qpd->num_gws;
-       }
+       queue_input.exclusively_scheduled = q->properties.is_gws;
 
        amdgpu_mes_lock(&adev->mes);
        r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
@@ -250,7 +247,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
                        q->properties.doorbell_off);
                pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
                kfd_hws_hang(dqm);
-}
+       }
 
        return r;
 }
index ba9d690541193a2579c4a2aefd9e76ddc0073f11..60e6b37b43badaab41ad0f63ac9263a5798c6071 100644 (file)
@@ -123,7 +123,7 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
        if (!gws && pdd->qpd.num_gws == 0)
                return -EINVAL;
 
-       if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3)) {
+       if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && !dev->kfd->shared_resources.enable_mes) {
                if (gws)
                        ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
                                gws, &mem);
@@ -136,7 +136,9 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
        } else {
                /*
                 * Intentionally set GWS to a non-NULL value
-                * for GFX 9.4.3.
+                * for devices that do not use GWS for global wave
+                * synchronization but require the formality
+                * of setting GWS for cooperative groups.
                 */
                pqn->q->gws = gws ? ERR_PTR(-ENOMEM) : NULL;
        }
@@ -173,7 +175,8 @@ void pqm_uninit(struct process_queue_manager *pqm)
 
        list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
                if (pqn->q && pqn->q->gws &&
-                   KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3))
+                   KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
+                   !pqn->q->device->kfd->shared_resources.enable_mes)
                        amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
                                pqn->q->gws);
                kfd_procfs_del_queue(pqn->q);
@@ -455,7 +458,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
                }
 
                if (pqn->q->gws) {
-                       if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3))
+                       if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
+                           !dev->kfd->shared_resources.enable_mes)
                                amdgpu_amdkfd_remove_gws_from_process(
                                                pqm->process->kgd_process_info,
                                                pqn->q->gws);
index 0997e999416a48ec905379083fb4b2b0ba7b7f76..b1db2b19018742eb4a41bc23084c3a882323c05a 100644 (file)
@@ -275,7 +275,9 @@ union MESAPI__ADD_QUEUE {
                        uint32_t trap_en                : 1;
                        uint32_t is_aql_queue           : 1;
                        uint32_t skip_process_ctx_clear : 1;
-                       uint32_t reserved               : 19;
+                       uint32_t map_legacy_kq          : 1;
+                       uint32_t exclusively_scheduled  : 1;
+                       uint32_t reserved               : 17;
                };
                struct MES_API_STATUS           api_status;
                uint64_t                        tma_addr;