drm/amdgpu: Optimize the enablement of GECC
authorCandice Li <candice.li@amd.com>
Tue, 11 Feb 2025 01:58:24 +0000 (09:58 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 17 Feb 2025 19:09:29 +0000 (14:09 -0500)
Enable GECC only when the default memory ECC mode or
the module parameter amdgpu_ras_enable is activated.

v2: Add kernel message to remind users explicitly set
    amdgpu_ras_enable=1 before driver loading to enable GECC
    and set amdgpu_ras_enable=0 to disable GECC when GECC is
    currently enabled if needed.

Signed-off-by: Candice Li <candice.li@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c

index cb24b464be2ba6555e7b7a09d72487008e01ed0d..2b1990ea9639d429e5e9a8efaefc3436af053440 100644 (file)
@@ -1154,6 +1154,7 @@ struct amdgpu_device {
        struct ratelimit_state          throttling_logging_rs;
        uint32_t                        ras_hw_enabled;
        uint32_t                        ras_enabled;
+       bool                            ras_default_ecc_enabled;
 
        bool                            no_hw_access;
        struct pci_saved_state          *pci_state;
index f873dd3cae1606a9a7844106adf0adbdcb85a012..eb015bdda8a749aa5bf99f43fd8e777f9fc1b336 100644 (file)
@@ -549,9 +549,10 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev)
        u16 data_offset, size;
        union umc_info *umc_info;
        u8 frev, crev;
-       bool ecc_default_enabled = false;
+       bool mem_ecc_enabled = false;
        u8 umc_config;
        u32 umc_config1;
+       adev->ras_default_ecc_enabled = false;
 
        index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
                        umc_info);
@@ -563,20 +564,22 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev)
                        switch (crev) {
                        case 1:
                                umc_config = le32_to_cpu(umc_info->v31.umc_config);
-                               ecc_default_enabled =
+                               mem_ecc_enabled =
                                        (umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
                                break;
                        case 2:
                                umc_config = le32_to_cpu(umc_info->v32.umc_config);
-                               ecc_default_enabled =
+                               mem_ecc_enabled =
                                        (umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
                                break;
                        case 3:
                                umc_config = le32_to_cpu(umc_info->v33.umc_config);
                                umc_config1 = le32_to_cpu(umc_info->v33.umc_config1);
-                               ecc_default_enabled =
+                               mem_ecc_enabled =
                                        ((umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ||
                                         (umc_config1 & UMC_CONFIG1__ENABLE_ECC_CAPABLE)) ? true : false;
+                               adev->ras_default_ecc_enabled =
+                                       (umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
                                break;
                        default:
                                /* unsupported crev */
@@ -585,9 +588,12 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev)
                } else if (frev == 4) {
                        switch (crev) {
                        case 0:
+                               umc_config = le32_to_cpu(umc_info->v40.umc_config);
                                umc_config1 = le32_to_cpu(umc_info->v40.umc_config1);
-                               ecc_default_enabled =
+                               mem_ecc_enabled =
                                        (umc_config1 & UMC_CONFIG1__ENABLE_ECC_CAPABLE) ? true : false;
+                               adev->ras_default_ecc_enabled =
+                                       (umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
                                break;
                        default:
                                /* unsupported crev */
@@ -599,7 +605,7 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev)
                }
        }
 
-       return ecc_default_enabled;
+       return mem_ecc_enabled;
 }
 
 /*
index 94f5e5e0d1d680399478783ad30e50f53c4d9fae..0f2eb69ad715bfb8ad2a4ea7a172146981332b7d 100644 (file)
@@ -1794,34 +1794,47 @@ int psp_ras_initialize(struct psp_context *psp)
                if (ret)
                        dev_warn(adev->dev, "PSP get boot config failed\n");
 
-               if (!amdgpu_ras_is_supported(psp->adev, AMDGPU_RAS_BLOCK__UMC)) {
-                       if (!boot_cfg) {
-                               dev_info(adev->dev, "GECC is disabled\n");
-                       } else {
-                               /* disable GECC in next boot cycle if ras is
-                                * disabled by module parameter amdgpu_ras_enable
-                                * and/or amdgpu_ras_mask, or boot_config_get call
-                                * is failed
-                                */
-                               ret = psp_boot_config_set(adev, 0);
-                               if (ret)
-                                       dev_warn(adev->dev, "PSP set boot config failed\n");
-                               else
-                                       dev_warn(adev->dev, "GECC will be disabled in next boot cycle if set amdgpu_ras_enable and/or amdgpu_ras_mask to 0x0\n");
-                       }
+               if (boot_cfg == 1 && !adev->ras_default_ecc_enabled &&
+                   amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) {
+                       dev_warn(adev->dev, "GECC is currently enabled, which may affect performance\n");
+                       dev_warn(adev->dev,
+                               "To disable GECC, please reboot the system and load the amdgpu driver with the parameter amdgpu_ras_enable=0\n");
                } else {
-                       if (boot_cfg == 1) {
-                               dev_info(adev->dev, "GECC is enabled\n");
+                       if ((adev->ras_default_ecc_enabled || amdgpu_ras_enable == 1) &&
+                               amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) {
+                               if (boot_cfg == 1) {
+                                       dev_info(adev->dev, "GECC is enabled\n");
+                               } else {
+                                       /* enable GECC in next boot cycle if it is disabled
+                                        * in boot config, or force enable GECC if failed to
+                                        * get boot configuration
+                                        */
+                                       ret = psp_boot_config_set(adev, BOOT_CONFIG_GECC);
+                                       if (ret)
+                                               dev_warn(adev->dev, "PSP set boot config failed\n");
+                                       else
+                                               dev_warn(adev->dev, "GECC will be enabled in next boot cycle\n");
+                               }
                        } else {
-                               /* enable GECC in next boot cycle if it is disabled
-                                * in boot config, or force enable GECC if failed to
-                                * get boot configuration
-                                */
-                               ret = psp_boot_config_set(adev, BOOT_CONFIG_GECC);
-                               if (ret)
-                                       dev_warn(adev->dev, "PSP set boot config failed\n");
-                               else
-                                       dev_warn(adev->dev, "GECC will be enabled in next boot cycle\n");
+                               if (!boot_cfg) {
+                                       if (!adev->ras_default_ecc_enabled &&
+                                           amdgpu_ras_enable != 1 &&
+                                           amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+                                               dev_warn(adev->dev, "GECC is disabled, set amdgpu_ras_enable=1 to enable GECC in next boot cycle if needed\n");
+                                       else
+                                               dev_info(adev->dev, "GECC is disabled\n");
+                               } else {
+                                       /* disable GECC in next boot cycle if ras is
+                                        * disabled by module parameter amdgpu_ras_enable
+                                        * and/or amdgpu_ras_mask, or boot_config_get call
+                                        * is failed
+                                        */
+                                       ret = psp_boot_config_set(adev, 0);
+                                       if (ret)
+                                               dev_warn(adev->dev, "PSP set boot config failed\n");
+                                       else
+                                               dev_warn(adev->dev, "GECC will be disabled in next boot cycle if set amdgpu_ras_enable and/or amdgpu_ras_mask to 0x0\n");
+                               }
                        }
                }
        }