drm/amdgpu: Make enforce_isolation setting per GPU
authorSrinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Mon, 29 Jul 2024 16:05:26 +0000 (21:35 +0530)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 16 Aug 2024 18:27:45 +0000 (14:27 -0400)
This commit makes enforce_isolation setting to be per GPU and per
partition by adding the enforce_isolation array to the adev structure.
The adev variable is set based on the global enforce_isolation module
parameter during device initialization.

In amdgpu_ids.c, the adev->enforce_isolation value for the current GPU
is used to determine whether to enforce isolation between graphics and
compute processes on that GPU.

In amdgpu_ids.c, the adev->enforce_isolation value for the current GPU
and partition is used to determine whether to enforce isolation between
graphics and compute processes on that GPU and partition.

This allows the enforce_isolation setting to be controlled individually
for each GPU and each partition, which is useful in a system with
multiple GPUs and partitions where different isolation settings might be
desired for different GPUs and partitions.

v2: fix loop in amdgpu_vmid_mgr_init() (Alex)

Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Suggested-by: Christian König <christian.koenig@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h

index 937de21a71422feba90baebd97917256113cf8d3..0dceeea235cf781ac0b10aeee7716dc087007c41 100644 (file)
@@ -1162,6 +1162,8 @@ struct amdgpu_device {
        bool                            debug_disable_soft_recovery;
        bool                            debug_use_vram_fw_buf;
        bool                            debug_enable_ras_aca;
+
+       bool                            enforce_isolation[MAX_XCP];
 };
 
 static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
index 78b3c067fea7e276fcca711510861abfd90fcaf6..5d5ba1e3d90f56bb0ca37314b526692df5209053 100644 (file)
@@ -1110,7 +1110,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
                        struct drm_gpu_scheduler *sched = entity->rq->sched;
                        struct amdgpu_ring *ring = to_amdgpu_ring(sched);
 
-                       if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub))
+                       if (amdgpu_vmid_uses_reserved(adev, vm, ring->vm_hub))
                                return -EINVAL;
                }
        }
index 482db4ebcc4ba0702ea3a273689e620ed182ac71..e623af740aa353a3502c336a9d4d8cc33fe87697 100644 (file)
@@ -1916,6 +1916,8 @@ static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
  */
 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
 {
+       int i;
+
        if (amdgpu_sched_jobs < 4) {
                dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
                         amdgpu_sched_jobs);
@@ -1970,6 +1972,9 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
 
        adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
 
+       for (i = 0; i < MAX_XCP; i++)
+               adev->enforce_isolation[i] = !!enforce_isolation;
+
        return 0;
 }
 
index 6608eeb61e5a06c0d7ae1254c144671867547033..92d27d32de41ba138757c8c7f0b3f5c168a96fd6 100644 (file)
@@ -424,7 +424,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
        if (r || !idle)
                goto error;
 
-       if (amdgpu_vmid_uses_reserved(vm, vmhub)) {
+       if (amdgpu_vmid_uses_reserved(adev, vm, vmhub)) {
                r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence);
                if (r || !id)
                        goto error;
@@ -476,15 +476,19 @@ error:
 
 /*
  * amdgpu_vmid_uses_reserved - check if a VM will use a reserved VMID
+ * @adev: amdgpu_device pointer
  * @vm: the VM to check
  * @vmhub: the VMHUB which will be used
  *
  * Returns: True if the VM will use a reserved VMID.
  */
-bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub)
+bool amdgpu_vmid_uses_reserved(struct amdgpu_device *adev,
+                              struct amdgpu_vm *vm, unsigned int vmhub)
 {
        return vm->reserved_vmid[vmhub] ||
-               (enforce_isolation && AMDGPU_IS_GFXHUB(vmhub));
+               (adev->enforce_isolation[(vm->root.bo->xcp_id != AMDGPU_XCP_NO_PARTITION) ?
+                                        vm->root.bo->xcp_id : 0] &&
+                AMDGPU_IS_GFXHUB(vmhub));
 }
 
 int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
@@ -600,9 +604,10 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
                }
        }
        /* alloc a default reserved vmid to enforce isolation */
-       if (enforce_isolation)
-               amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0));
-
+       for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
+               if (adev->enforce_isolation[i])
+                       amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i));
+       }
 }
 
 /**
index 240fa675126029a0050a44cd4d5065eefcf70bef..4012fb2dd08a59960ebea95cafdbfa503de170c3 100644 (file)
@@ -78,7 +78,8 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,
 
 bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
                               struct amdgpu_vmid *id);
-bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub);
+bool amdgpu_vmid_uses_reserved(struct amdgpu_device *adev,
+                              struct amdgpu_vm *vm, unsigned int vmhub);
 int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
                                unsigned vmhub);
 void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,