drm/amdgpu/userq: add helpers to start/stop scheduling
authorAlex Deucher <alexander.deucher@amd.com>
Thu, 10 Apr 2025 17:26:43 +0000 (13:26 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 21 Apr 2025 14:55:59 +0000 (10:55 -0400)
This will be used to stop/start user queue scheduling for
example when switching between kernel and user queues when
enforce isolation is enabled.

v2: use idx
v3: only stop compute/gfx queues

Reviewed-by: Sunil Khatri <sunil.khatri@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h

index 3212fd78b012911e97203bb0816129bb015b6db8..68410ba1b9446dec83293c0a8fe7b60ca715abc8 100644 (file)
@@ -1249,6 +1249,7 @@ struct amdgpu_device {
 
        struct list_head                userq_mgr_list;
        struct mutex                    userq_mutex;
+       bool                            userq_halt_for_enforce_isolation;
 };
 
 static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
index 1867520ba258ed1f912c4f4037fee7978963ef97..e944d05685dde4f6c737cabda1e1e785a6bac66c 100644 (file)
@@ -275,6 +275,7 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args)
        const struct amdgpu_userq_funcs *uq_funcs;
        struct amdgpu_usermode_queue *queue;
        struct amdgpu_db_info db_info;
+       bool skip_map_queue;
        uint64_t index;
        int qid, r = 0;
 
@@ -348,6 +349,7 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args)
                goto unlock;
        }
 
+
        qid = idr_alloc(&uq_mgr->userq_idr, queue, 1, AMDGPU_MAX_USERQ_COUNT, GFP_KERNEL);
        if (qid < 0) {
                DRM_ERROR("Failed to allocate a queue id\n");
@@ -358,15 +360,28 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args)
                goto unlock;
        }
 
-       r = uq_funcs->map(uq_mgr, queue);
-       if (r) {
-               DRM_ERROR("Failed to map Queue\n");
-               idr_remove(&uq_mgr->userq_idr, qid);
-               amdgpu_userq_fence_driver_free(queue);
-               uq_funcs->mqd_destroy(uq_mgr, queue);
-               kfree(queue);
-               goto unlock;
+       /* don't map the queue if scheduling is halted */
+       mutex_lock(&adev->userq_mutex);
+       if (adev->userq_halt_for_enforce_isolation &&
+           ((queue->queue_type == AMDGPU_HW_IP_GFX) ||
+            (queue->queue_type == AMDGPU_HW_IP_COMPUTE)))
+               skip_map_queue = true;
+       else
+               skip_map_queue = false;
+       if (!skip_map_queue) {
+               r = uq_funcs->map(uq_mgr, queue);
+               if (r) {
+                       mutex_unlock(&adev->userq_mutex);
+                       DRM_ERROR("Failed to map Queue\n");
+                       idr_remove(&uq_mgr->userq_idr, qid);
+                       amdgpu_userq_fence_driver_free(queue);
+                       uq_funcs->mqd_destroy(uq_mgr, queue);
+                       kfree(queue);
+                       goto unlock;
+               }
        }
+       mutex_unlock(&adev->userq_mutex);
+
 
        args->out.queue_id = qid;
 
@@ -733,3 +748,58 @@ int amdgpu_userq_resume(struct amdgpu_device *adev)
        mutex_unlock(&adev->userq_mutex);
        return ret;
 }
+
+int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
+                                                 u32 idx)
+{
+       const struct amdgpu_userq_funcs *userq_funcs;
+       struct amdgpu_usermode_queue *queue;
+       struct amdgpu_userq_mgr *uqm, *tmp;
+       int queue_id;
+       int ret = 0;
+
+       mutex_lock(&adev->userq_mutex);
+       if (adev->userq_halt_for_enforce_isolation)
+               dev_warn(adev->dev, "userq scheduling already stopped!\n");
+       adev->userq_halt_for_enforce_isolation = true;
+       list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+               cancel_delayed_work_sync(&uqm->resume_work);
+               idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
+                       if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
+                            (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
+                           (queue->xcp_id == idx)) {
+                               userq_funcs = adev->userq_funcs[queue->queue_type];
+                               ret |= userq_funcs->unmap(uqm, queue);
+                       }
+               }
+       }
+       mutex_unlock(&adev->userq_mutex);
+       return ret;
+}
+
+int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
+                                                  u32 idx)
+{
+       const struct amdgpu_userq_funcs *userq_funcs;
+       struct amdgpu_usermode_queue *queue;
+       struct amdgpu_userq_mgr *uqm, *tmp;
+       int queue_id;
+       int ret = 0;
+
+       mutex_lock(&adev->userq_mutex);
+       if (!adev->userq_halt_for_enforce_isolation)
+               dev_warn(adev->dev, "userq scheduling already started!\n");
+       adev->userq_halt_for_enforce_isolation = false;
+       list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+               idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
+                       if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
+                            (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
+                           (queue->xcp_id == idx)) {
+                               userq_funcs = adev->userq_funcs[queue->queue_type];
+                               ret |= userq_funcs->map(uqm, queue);
+                       }
+               }
+       }
+       mutex_unlock(&adev->userq_mutex);
+       return ret;
+}
index db79141e1c1e0437d61550caaef1d192ba679817..0701f33e6740f93d5c903c5c9e7cf14130347b8c 100644 (file)
@@ -115,4 +115,9 @@ uint64_t amdgpu_userqueue_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
 int amdgpu_userq_suspend(struct amdgpu_device *adev);
 int amdgpu_userq_resume(struct amdgpu_device *adev);
 
+int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
+                                                 u32 idx);
+int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
+                                                  u32 idx);
+
 #endif