drm/amdgpu: adjust aca init/fini sequence to match gpu reset
authorYang Wang <kevinyang.wang@amd.com>
Wed, 24 Jan 2024 02:15:10 +0000 (10:15 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 25 Jan 2024 19:58:02 +0000 (14:58 -0500)
- move aca init/fini function into ras init/fini to adapt gpu reset
  sequence.
- add new function amdgpu_aca_reset()

Signed-off-by: Yang Wang <kevinyang.wang@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

index bf1ca6839406341b3671961b3e09da3eeb6ccf37..bb9e0612928ebf378a849e877a6169564d253843 100644 (file)
@@ -682,6 +682,13 @@ void amdgpu_aca_fini(struct amdgpu_device *adev)
        aca_manager_fini(&aca->mgr);
 }
 
+int amdgpu_aca_reset(struct amdgpu_device *adev)
+{
+       amdgpu_aca_fini(adev);
+
+       return amdgpu_aca_init(adev);
+}
+
 void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs)
 {
        struct amdgpu_aca *aca = &adev->aca;
index 6e9a35eda68309a58b6d7d7e5b29579e98b163d8..2da50e0958831e897ca7fb5893810c8328162303 100644 (file)
@@ -185,6 +185,7 @@ struct aca_info {
 
 int amdgpu_aca_init(struct amdgpu_device *adev);
 void amdgpu_aca_fini(struct amdgpu_device *adev);
+int amdgpu_aca_reset(struct amdgpu_device *adev);
 void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs);
 bool amdgpu_aca_is_enabled(struct amdgpu_device *adev);
 
index 1a04ccba9542a3af5cb97cda9d499ba4075b0704..9689756bf9f5f8d27108cda07f2ee940b0224e92 100644 (file)
@@ -4048,10 +4048,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
        amdgpu_device_get_pcie_info(adev);
 
-       r = amdgpu_aca_init(adev);
-       if (r)
-               return r;
-
        r = amdgpu_device_get_job_timeout_settings(adev);
        if (r) {
                dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
@@ -4448,8 +4444,6 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
 
        amdgpu_reset_fini(adev);
 
-       amdgpu_aca_fini(adev);
-
        /* free i2c buses */
        if (!amdgpu_device_has_dc_support(adev))
                amdgpu_i2c_fini(adev);
index 80816c4ec1f1b2f8c2708e1fa28f92d709fb4750..ebcd1cb60052207c168f56db3e240d713b9fb078 100644 (file)
@@ -3344,10 +3344,18 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
        if (amdgpu_sriov_vf(adev))
                return 0;
 
-       if (amdgpu_aca_is_enabled(adev))
+       if (amdgpu_aca_is_enabled(adev)) {
+               if (amdgpu_in_reset(adev))
+                       r = amdgpu_aca_reset(adev);
+                else
+                       r = amdgpu_aca_init(adev);
+               if (r)
+                       return r;
+
                amdgpu_ras_set_aca_debug_mode(adev, false);
-       else
+       } else {
                amdgpu_ras_set_mca_debug_mode(adev, false);
+       }
 
        list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
                obj = node->ras_obj;
@@ -3416,6 +3424,9 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
        amdgpu_ras_fs_fini(adev);
        amdgpu_ras_interrupt_remove_all(adev);
 
+       if (amdgpu_aca_is_enabled(adev))
+               amdgpu_aca_fini(adev);
+
        WARN(AMDGPU_RAS_GET_FEATURES(con->features), "Feature mask is not cleared");
 
        if (AMDGPU_RAS_GET_FEATURES(con->features))