drm/amdgpu: handle old RAS eeprom data in non-nps1 mode
authorganglxie <ganglxie@amd.com>
Thu, 22 May 2025 06:28:52 +0000 (14:28 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 29 May 2025 14:57:05 +0000 (10:57 -0400)
Get MCA address from PA in nps1, then convert MCA address to PA in specific nps
mode.

Signed-off-by: ganglxie <ganglxie@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h

index 8a13a0c7b9cdd025b737bf52ed3bc26aa3a9cfe5..b0b1dbd4ae357a15c67a53eb89251c9cc7ecb12f 100644 (file)
@@ -2886,8 +2886,20 @@ static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev,
                                bps->retired_page << AMDGPU_GPU_PAGE_SHIFT))
                        return -EINVAL;
        } else {
-               if (amdgpu_ras_mca2pa_by_idx(adev, bps, err_data))
-                       return -EINVAL;
+               if (bps->address) {
+                       if (amdgpu_ras_mca2pa_by_idx(adev, bps, err_data))
+                               return -EINVAL;
+               } else {
+                       /* for specific old eeprom data, mca address is not stored,
+                        * calc it from pa
+                        */
+                       if (amdgpu_umc_pa2mca(adev, bps->retired_page << AMDGPU_GPU_PAGE_SHIFT,
+                               &(bps->address), AMDGPU_NPS1_PARTITION_MODE))
+                               return -EINVAL;
+
+                       if (amdgpu_ras_mca2pa(adev, bps, err_data))
+                               return -EOPNOTSUPP;
+               }
        }
 
        return __amdgpu_ras_restore_bad_pages(adev, err_data->err_addr,
index 8c6e55b5b967931a2e7247a2e39d4b5ff84026af..c92b8794aa73dab53424b98b31d5c624ed44fcc2 100644 (file)
@@ -562,3 +562,26 @@ int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev,
 
        return 0;
 }
+
+int amdgpu_umc_pa2mca(struct amdgpu_device *adev,
+               uint64_t pa, uint64_t *mca, enum amdgpu_memory_partition nps)
+{
+       struct ta_ras_query_address_input addr_in;
+       struct ta_ras_query_address_output addr_out;
+       int ret;
+
+       /* nps: the pa belongs to */
+       addr_in.pa.pa = pa | ((uint64_t)nps << 58);
+       addr_in.addr_type = TA_RAS_PA_TO_MCA;
+       ret = psp_ras_query_address(&adev->psp, &addr_in, &addr_out);
+       if (ret) {
+               dev_warn(adev->dev, "Failed to query RAS MCA address for 0x%llx",
+                       pa);
+
+               return ret;
+       }
+
+       *mca = addr_out.ma.err_addr;
+
+       return 0;
+}
index 29ce6b1d214a6c6b44f6a4a76aaa5c77692bcfd5..ec203f9e5ffab7a768c4c15fa6acf4ccf384d2e9 100644 (file)
@@ -189,4 +189,6 @@ int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev,
                        uint64_t err_addr, uint32_t ch, uint32_t umc,
                        uint32_t node, uint32_t socket,
                        struct ta_ras_query_address_output *addr_out, bool dump_addr);
+int amdgpu_umc_pa2mca(struct amdgpu_device *adev,
+               uint64_t pa, uint64_t *mca, enum amdgpu_memory_partition nps);
 #endif