drm/amdgpu: add the fan abnormal detection feature
authorlyndonli <Lyndon.Li@amd.com>
Mon, 21 Nov 2022 01:10:20 +0000 (09:10 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 23 Nov 2022 14:47:15 +0000 (09:47 -0500)
Update the SW CTF limit from existing register
when there's a fan failure detected via SMU interrupt.

Signed-off-by: lyndonli <Lyndon.Li@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Kenneth Feng <kenneth.feng@amd.com>
Reviewed-by: Evan Quan <evan.quan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c

index 44bbf17e4bef11f232c8ca04899e5b8b0362ec88..3bc4128a22ac2d6b4c411d1e6bdca12091309a83 100644 (file)
@@ -168,6 +168,7 @@ struct smu_temperature_range {
        int mem_crit_max;
        int mem_emergency_max;
        int software_shutdown_temp;
+       int software_shutdown_temp_offset;
 };
 
 struct smu_state_validation_block {
index 89f0f6eb19f3d12d7077a436961a0d1841c208e7..5a905002252d7d9e45921b2881d05ab4f0e53ee2 100644 (file)
@@ -1376,6 +1376,7 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev,
         */
        uint32_t ctxid = entry->src_data[0];
        uint32_t data;
+       uint32_t high;
 
        if (client_id == SOC15_IH_CLIENTID_THM) {
                switch (src_id) {
@@ -1432,6 +1433,33 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev,
                                        schedule_work(&smu->throttling_logging_work);
 
                                break;
+                       case 0x8:
+                               high = smu->thermal_range.software_shutdown_temp +
+                                       smu->thermal_range.software_shutdown_temp_offset;
+                               high = min(SMU_THERMAL_MAXIMUM_ALERT_TEMP, high);
+                               dev_emerg(adev->dev, "Reduce soft CTF limit to %d (by an offset %d)\n",
+                                                       high,
+                                                       smu->thermal_range.software_shutdown_temp_offset);
+
+                               data = RREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL);
+                               data = REG_SET_FIELD(data, THM_THERMAL_INT_CTRL,
+                                                       DIG_THERM_INTH,
+                                                       (high & 0xff));
+                               data = data & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK);
+                               WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL, data);
+                               break;
+                       case 0x9:
+                               high = min(SMU_THERMAL_MAXIMUM_ALERT_TEMP,
+                                       smu->thermal_range.software_shutdown_temp);
+                               dev_emerg(adev->dev, "Recover soft CTF limit to %d\n", high);
+
+                               data = RREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL);
+                               data = REG_SET_FIELD(data, THM_THERMAL_INT_CTRL,
+                                                       DIG_THERM_INTH,
+                                                       (high & 0xff));
+                               data = data & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK);
+                               WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL, data);
+                               break;
                        }
                }
        }
index d74debc584f89abec9bba396e8988d7d8b0573c0..c3c9ef523e59d0269799dcd8b10267500c01b51a 100644 (file)
@@ -1223,6 +1223,7 @@ static int smu_v13_0_7_get_thermal_temperature_range(struct smu_context *smu,
        range->mem_emergency_max = (pptable->SkuTable.TemperatureLimit[TEMP_MEM] + CTF_OFFSET_MEM)*
                SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
        range->software_shutdown_temp = powerplay_table->software_shutdown_temp;
+       range->software_shutdown_temp_offset = pptable->SkuTable.FanAbnormalTempLimitOffset;
 
        return 0;
 }