drm/amdgpu: Enable retry faults unconditionally on Aldebaran
authorFelix Kuehling <Felix.Kuehling@amd.com>
Thu, 11 Feb 2021 20:57:20 +0000 (15:57 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 21 Apr 2021 01:47:34 +0000 (21:47 -0400)
This is needed to allow per-process XNACK mode selection in the SQ when
booting with XNACK off by default.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Philip Yang <Philip.Yang@amd.com>
Tested-by: Alex Sierra <alex.sierra@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c

index 922938931e1a76143592a35d29cfcd58d454068b..4f62c78485cae4a14abb95c76cb0f330ff7469ab 100644 (file)
@@ -640,7 +640,8 @@ module_param_named(mes, amdgpu_mes, int, 0444);
 
 /**
  * DOC: noretry (int)
- * Disable retry faults in the GPU memory controller.
+ * Disable XNACK retry in the SQ by default on GFXv9 hardware. On ASICs that
+ * do not support per-process XNACK this also disables retry page faults.
  * (0 = retry enabled, 1 = retry disabled, -1 auto (default))
  */
 MODULE_PARM_DESC(noretry,
index 1e4678cb98f01fe18713c0779808b4475bd37a0d..a03fdd41212b0b980796d79c039c418b78cf3f45 100644 (file)
@@ -283,10 +283,14 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
                tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
                                    PAGE_TABLE_BLOCK_SIZE,
                                    block_size);
-               /* Send no-retry XNACK on fault to suppress VM fault storm. */
+               /* Send no-retry XNACK on fault to suppress VM fault storm.
+                * On Aldebaran, XNACK can be enabled in the SQ per-process.
+                * Retry faults need to be enabled for that to work.
+                */
                tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
                                    RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
-                                   !adev->gmc.noretry);
+                                   !adev->gmc.noretry ||
+                                   adev->asic_type == CHIP_ALDEBARAN);
                WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL,
                                    i * hub->ctx_distance, tmp);
                WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
index 0103a5ab28e60d9c57372f1d5bf57f593ac1d670..9aaa137662b5cb62284ecea058ddea29643e1ef8 100644 (file)
@@ -296,10 +296,12 @@ static void mmhub_v1_7_setup_vmid_config(struct amdgpu_device *adev)
                tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
                                    PAGE_TABLE_BLOCK_SIZE,
                                    block_size);
-               /* Send no-retry XNACK on fault to suppress VM fault storm. */
+               /* On Aldebaran, XNACK can be enabled in the SQ per-process.
+                * Retry faults need to be enabled for that to work.
+                */
                tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
                                    RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
-                                   !adev->gmc.noretry);
+                                   1);
                WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_CNTL,
                                    i * hub->ctx_distance, tmp);
                WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,