drm/amdgpu: Generate cper records
authorHawking Zhang <Hawking.Zhang@amd.com>
Tue, 11 Feb 2025 11:54:05 +0000 (19:54 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 17 Feb 2025 19:09:29 +0000 (14:09 -0500)
Encode the error information in CPER format and commit
to the cper ring

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Yang Wang <keivnyang.wang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c

index ed1c20bd81147a138541b69394c75b7fbc444359..c0da9096a7fae5fc8eac79077e1385dafb5d4dc9 100644 (file)
@@ -384,6 +384,36 @@ static bool aca_bank_should_update(struct amdgpu_device *adev, enum aca_smu_type
        return ret;
 }
 
+static void aca_banks_generate_cper(struct amdgpu_device *adev,
+                                   enum aca_smu_type type,
+                                   struct aca_banks *banks,
+                                   int count)
+{
+       struct aca_bank_node *node;
+       struct aca_bank *bank;
+
+       if (!adev || !banks || !count) {
+               dev_warn(adev->dev, "fail to generate cper records\n");
+               return;
+       }
+
+       /* UEs must be encoded into separate CPER entries */
+       if (type == ACA_SMU_TYPE_UE) {
+               list_for_each_entry(node, &banks->list, node) {
+                       bank = &node->bank;
+                       if (amdgpu_cper_generate_ue_record(adev, bank))
+                               dev_warn(adev->dev, "fail to generate ue cper records\n");
+               }
+       } else {
+               /*
+                * SMU_TYPE_CE banks are combined into 1 CPER entries,
+                * they could be CEs or DEs or both
+                */
+               if (amdgpu_cper_generate_ce_records(adev, banks, count))
+                       dev_warn(adev->dev, "fail to generate ce cper records\n");
+       }
+}
+
 static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type,
                            bank_handler_t handler, struct ras_query_context *qctx, void *data)
 {
@@ -421,6 +451,8 @@ static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type,
        if (ret)
                goto err_release_banks;
 
+       aca_banks_generate_cper(adev, type, &banks, count);
+
 err_release_banks:
        aca_banks_release(&banks);