drm/amdkfd: add reset queue function for RAS poison (v2)
authorTao Zhou <tao.zhou1@amd.com>
Thu, 16 Dec 2021 06:34:10 +0000 (14:34 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 28 Dec 2021 21:02:47 +0000 (16:02 -0500)
The new interface unmaps queues with reset mode for the process consumes
RAS poison, it's only for compute queue.

v2: rename the function to reset_queues.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h

index 01a2cc3928ac5a6c22a12d3cf006023a5aa41e2a..19890e3501075001f58f6a91fac2f6be7516caf7 100644 (file)
@@ -1476,6 +1476,21 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
        return retval;
 }
 
+/* only for compute queue */
+static int reset_queues_cpsch(struct device_queue_manager *dqm,
+                       uint16_t pasid)
+{
+       int retval;
+
+       dqm_lock(dqm);
+
+       retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID,
+                       pasid, true);
+
+       dqm_unlock(dqm);
+       return retval;
+}
+
 /* dqm->lock mutex has to be locked before calling this function */
 static int execute_queues_cpsch(struct device_queue_manager *dqm,
                                enum kfd_unmap_queues_filter filter,
@@ -1896,6 +1911,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
                dqm->ops.evict_process_queues = evict_process_queues_cpsch;
                dqm->ops.restore_process_queues = restore_process_queues_cpsch;
                dqm->ops.get_wave_state = get_wave_state;
+               dqm->ops.reset_queues = reset_queues_cpsch;
                break;
        case KFD_SCHED_POLICY_NO_HWS:
                /* initialize dqm for no cp scheduling */
index 499fc0ea387fabfe14c2e2c21b398e50c0ca663c..e145e4deb53a778cf99f65bd43dabeb4fea411b9 100644 (file)
@@ -81,6 +81,8 @@ struct device_process_node {
  *
  * @get_wave_state: Retrieves context save state and optionally copies the
  * control stack, if kept in the MQD, to the given userspace address.
+ *
+ * @reset_queues: reset queues which consume RAS poison
  */
 
 struct device_queue_manager_ops {
@@ -134,6 +136,9 @@ struct device_queue_manager_ops {
                                  void __user *ctl_stack,
                                  u32 *ctl_stack_used_size,
                                  u32 *save_area_used_size);
+
+       int (*reset_queues)(struct device_queue_manager *dqm,
+                                       uint16_t pasid);
 };
 
 struct device_queue_manager_asic_ops {