drm/amdgpu: Add more types for boot time error reporting
authorHawking Zhang <Hawking.Zhang@amd.com>
Thu, 1 Aug 2024 05:45:27 +0000 (13:45 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 6 Aug 2024 15:10:17 +0000 (11:10 -0400)
Data abort exception and unknown errors are supported.

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h

index 0fb2d9285834433d89c9a5a15a03b7a192506a13..9cda368ad794ce48262a711074e84d34673ffcdf 100644 (file)
@@ -4748,6 +4748,16 @@ static void amdgpu_ras_boot_time_error_reporting(struct amdgpu_device *adev,
                dev_info(adev->dev,
                         "socket: %d, aid: %d, hbm: %d, fw_status: 0x%x, hbm bist test failed\n",
                         socket_id, aid_id, hbm_id, fw_status);
+
+       if (AMDGPU_RAS_GPU_ERR_DATA_ABORT(boot_error))
+               dev_info(adev->dev,
+                        "socket: %d, aid: %d, fw_status: 0x%x, data abort exception\n",
+                        socket_id, aid_id, fw_status);
+
+       if (AMDGPU_RAS_GPU_ERR_UNKNOWN(boot_error))
+               dev_info(adev->dev,
+                        "socket: %d, aid: %d, fw_status: 0x%x, unknown boot time errors\n",
+                        socket_id, aid_id, fw_status);
 }
 
 static bool amdgpu_ras_boot_error_detected(struct amdgpu_device *adev,
index 7ddd13d5c06b53eaa2930013919ae38f937e343f..0d49b74bfe5ee7bb13598fd7a7a2144391fea1c7 100644 (file)
@@ -46,6 +46,8 @@ struct amdgpu_iv_entry;
 #define AMDGPU_RAS_GPU_ERR_SOCKET_ID(x)                        AMDGPU_GET_REG_FIELD(x, 10, 8)
 #define AMDGPU_RAS_GPU_ERR_AID_ID(x)                   AMDGPU_GET_REG_FIELD(x, 12, 11)
 #define AMDGPU_RAS_GPU_ERR_HBM_ID(x)                   AMDGPU_GET_REG_FIELD(x, 14, 13)
+#define AMDGPU_RAS_GPU_ERR_DATA_ABORT(x)               AMDGPU_GET_REG_FIELD(x, 29, 29)
+#define AMDGPU_RAS_GPU_ERR_UNKNOWN(x)                  AMDGPU_GET_REG_FIELD(x, 30, 30)
 
 #define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT   100
 #define AMDGPU_RAS_BOOT_STEADY_STATUS          0xBA