drm/amdgpu: enable BACO reset for SMU7 based dGPUs (v2)
authorAlex Deucher <alexander.deucher@amd.com>
Mon, 11 Mar 2019 23:05:12 +0000 (18:05 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 15 Oct 2019 19:55:32 +0000 (15:55 -0400)
Use BACO to reset the GPU if supported on SMU7 based
dGPUs.

v2: don't use baco on CI parts

Reviewed-by: Evan Quan <evan.quan@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/cik.c
drivers/gpu/drm/amd/amdgpu/cik.h
drivers/gpu/drm/amd/amdgpu/vi.c
drivers/gpu/drm/amd/amdgpu/vi.h

index b81bb414fcb300a98312fa7d60c5ced4c72d05f7..fc8b34480f66da2acbf91d00df667eeda6255423 100644 (file)
@@ -1270,15 +1270,15 @@ static int cik_gpu_pci_config_reset(struct amdgpu_device *adev)
 }
 
 /**
- * cik_asic_reset - soft reset GPU
+ * cik_asic_pci_config_reset - soft reset GPU
  *
  * @adev: amdgpu_device pointer
  *
- * Look up which blocks are hung and attempt
- * to reset them.
+ * Use PCI Config method to reset the GPU.
+ *
  * Returns 0 for success.
  */
-static int cik_asic_reset(struct amdgpu_device *adev)
+static int cik_asic_pci_config_reset(struct amdgpu_device *adev)
 {
        int r;
 
@@ -1294,7 +1294,45 @@ static int cik_asic_reset(struct amdgpu_device *adev)
 static enum amd_reset_method
 cik_asic_reset_method(struct amdgpu_device *adev)
 {
-       return AMD_RESET_METHOD_LEGACY;
+       bool baco_reset;
+
+       switch (adev->asic_type) {
+       case CHIP_BONAIRE:
+       case CHIP_HAWAII:
+               /* disable baco reset until it works */
+               /* smu7_asic_get_baco_capability(adev, &baco_reset); */
+               baco_reset = false;
+               break;
+       default:
+               baco_reset = false;
+               break;
+       }
+
+       if (baco_reset)
+               return AMD_RESET_METHOD_BACO;
+       else
+               return AMD_RESET_METHOD_LEGACY;
+}
+
+/**
+ * cik_asic_reset - soft reset GPU
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up which blocks are hung and attempt
+ * to reset them.
+ * Returns 0 for success.
+ */
+static int cik_asic_reset(struct amdgpu_device *adev)
+{
+       int r;
+
+       if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)
+               r = smu7_asic_baco_reset(adev);
+       else
+               r = cik_asic_pci_config_reset(adev);
+
+       return r;
 }
 
 static u32 cik_get_config_memsize(struct amdgpu_device *adev)
index 54c625a2e57054568f3498236186822cc55ed2d3..9870bf27870e060dbc5b22ad416782a8bbafd69b 100644 (file)
@@ -31,4 +31,7 @@ void cik_srbm_select(struct amdgpu_device *adev,
 int cik_set_ip_blocks(struct amdgpu_device *adev);
 
 void legacy_doorbell_index_init(struct amdgpu_device *adev);
+int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap);
+int smu7_asic_baco_reset(struct amdgpu_device *adev);
+
 #endif
index 5f8c8786cac51d22200bfb21a14653b2a67c60a5..78e5cdc0c05887307eb2e5864619207a7b870d92 100644 (file)
@@ -689,16 +689,50 @@ static int vi_gpu_pci_config_reset(struct amdgpu_device *adev)
        return -EINVAL;
 }
 
+int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap)
+{
+       void *pp_handle = adev->powerplay.pp_handle;
+       const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+
+       if (!pp_funcs || !pp_funcs->get_asic_baco_capability) {
+               *cap = false;
+               return -ENOENT;
+       }
+
+       return pp_funcs->get_asic_baco_capability(pp_handle, cap);
+}
+
+int smu7_asic_baco_reset(struct amdgpu_device *adev)
+{
+       void *pp_handle = adev->powerplay.pp_handle;
+       const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+
+       if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state)
+               return -ENOENT;
+
+       /* enter BACO state */
+       if (pp_funcs->set_asic_baco_state(pp_handle, 1))
+               return -EIO;
+
+       /* exit BACO state */
+       if (pp_funcs->set_asic_baco_state(pp_handle, 0))
+               return -EIO;
+
+       dev_info(adev->dev, "GPU BACO reset\n");
+
+       return 0;
+}
+
 /**
- * vi_asic_reset - soft reset GPU
+ * vi_asic_pci_config_reset - soft reset GPU
  *
  * @adev: amdgpu_device pointer
  *
- * Look up which blocks are hung and attempt
- * to reset them.
+ * Use PCI Config method to reset the GPU.
+ *
  * Returns 0 for success.
  */
-static int vi_asic_reset(struct amdgpu_device *adev)
+static int vi_asic_pci_config_reset(struct amdgpu_device *adev)
 {
        int r;
 
@@ -714,7 +748,47 @@ static int vi_asic_reset(struct amdgpu_device *adev)
 static enum amd_reset_method
 vi_asic_reset_method(struct amdgpu_device *adev)
 {
-       return AMD_RESET_METHOD_LEGACY;
+       bool baco_reset;
+
+       switch (adev->asic_type) {
+       case CHIP_FIJI:
+       case CHIP_TONGA:
+       case CHIP_POLARIS10:
+       case CHIP_POLARIS11:
+       case CHIP_POLARIS12:
+       case CHIP_TOPAZ:
+               smu7_asic_get_baco_capability(adev, &baco_reset);
+               break;
+       default:
+               baco_reset = false;
+               break;
+       }
+
+       if (baco_reset)
+               return AMD_RESET_METHOD_BACO;
+       else
+               return AMD_RESET_METHOD_LEGACY;
+}
+
+/**
+ * vi_asic_reset - soft reset GPU
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up which blocks are hung and attempt
+ * to reset them.
+ * Returns 0 for success.
+ */
+static int vi_asic_reset(struct amdgpu_device *adev)
+{
+       int r;
+
+       if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)
+               r = smu7_asic_baco_reset(adev);
+       else
+               r = vi_asic_pci_config_reset(adev);
+
+       return r;
 }
 
 static u32 vi_get_config_memsize(struct amdgpu_device *adev)
index 8de0772f986c53ea469d2cffe5c4ddb098f9f302..40d4174913a41d12107c626a52e04fa9c07f7bc6 100644 (file)
@@ -31,4 +31,7 @@ void vi_srbm_select(struct amdgpu_device *adev,
 int vi_set_ip_blocks(struct amdgpu_device *adev);
 
 void legacy_doorbell_index_init(struct amdgpu_device *adev);
+int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap);
+int smu7_asic_baco_reset(struct amdgpu_device *adev);
+
 #endif