drm/amd/display: Add new message for DF throttling optimization on dcn401
authorDillon Varone <dillon.varone@amd.com>
Wed, 27 Nov 2024 22:22:36 +0000 (17:22 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 18 Dec 2024 17:19:40 +0000 (12:19 -0500)
[WHY]
When effective bandwidth from the SoC is enough to perform SubVP
prefetchs, then DF throttling is not required.

[HOW]
Provide SMU the required clocks for which DF throttling is not required.

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Reviewed-by: Alvin Lee <alvin.lee2@amd.com>
Signed-off-by: Dillon Varone <dillon.varone@amd.com>
Signed-off-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h
drivers/gpu/drm/amd/display/dc/dc.h
drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h
drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c

index 8cfc5f4359374d1d93b5b66405d2c71f630fcc0c..5b4e1e8a9ae204e9479ec8dc459bac5f2c3c8e77 100644 (file)
@@ -141,6 +141,20 @@ static bool dcn401_is_ppclk_idle_dpm_enabled(struct clk_mgr_internal *clk_mgr, P
        return ppclk_idle_dpm_enabled;
 }
 
+static bool dcn401_is_df_throttle_opt_enabled(struct clk_mgr_internal *clk_mgr)
+{
+       bool is_df_throttle_opt_enabled = false;
+
+       if (ASICREV_IS_GC_12_0_1_A0(clk_mgr->base.ctx->asic_id.hw_internal_rev) &&
+                       clk_mgr->smu_ver >= 0x663500) {
+               is_df_throttle_opt_enabled = !clk_mgr->base.ctx->dc->debug.force_subvp_df_throttle;
+       }
+
+       is_df_throttle_opt_enabled &= clk_mgr->smu_present;
+
+       return is_df_throttle_opt_enabled;
+}
+
 /* Query SMU for all clock states for a particular clock */
 static void dcn401_init_single_clock(struct clk_mgr_internal *clk_mgr, PPCLK_e clk, unsigned int *entry_0,
                unsigned int *num_levels)
@@ -869,6 +883,12 @@ static void dcn401_execute_block_sequence(struct clk_mgr *clk_mgr_base, unsigned
                                        params->update_idle_hardmin_params.uclk_mhz,
                                        params->update_idle_hardmin_params.fclk_mhz);
                        break;
+               case CLK_MGR401_UPDATE_SUBVP_HARDMINS:
+                       dcn401_smu_set_subvp_uclk_fclk_hardmin(
+                                       clk_mgr_internal,
+                                       params->update_idle_hardmin_params.uclk_mhz,
+                                       params->update_idle_hardmin_params.fclk_mhz);
+                       break;
                case CLK_MGR401_UPDATE_DEEP_SLEEP_DCFCLK:
                        dcn401_smu_set_min_deep_sleep_dcef_clk(
                                        clk_mgr_internal,
@@ -945,15 +965,21 @@ static unsigned int dcn401_build_update_bandwidth_clocks_sequence(
        bool update_active_uclk = false;
        bool update_idle_fclk = false;
        bool update_idle_uclk = false;
+       bool update_subvp_prefetch_dramclk = false;
+       bool update_subvp_prefetch_fclk = false;
        bool is_idle_dpm_enabled = dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_UCLK) &&
                        dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK) &&
                        dcn401_is_ppclk_idle_dpm_enabled(clk_mgr_internal, PPCLK_UCLK) &&
                        dcn401_is_ppclk_idle_dpm_enabled(clk_mgr_internal, PPCLK_FCLK);
+       bool is_df_throttle_opt_enabled = is_idle_dpm_enabled &&
+               dcn401_is_df_throttle_opt_enabled(clk_mgr_internal);
        int total_plane_count = clk_mgr_helper_get_active_plane_cnt(dc, context);
        int active_uclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz);
        int active_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.fclk_khz);
        int idle_uclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.idle_dramclk_khz);
        int idle_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.idle_fclk_khz);
+       int subvp_prefetch_dramclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_dramclk_khz);
+       int subvp_prefetch_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_fclk_khz);
 
        unsigned int num_steps = 0;
 
@@ -1109,6 +1135,12 @@ static unsigned int dcn401_build_update_bandwidth_clocks_sequence(
                }
        }
 
+       if (should_set_clock(safe_to_lower, new_clocks->subvp_prefetch_dramclk_khz, clk_mgr_base->clks.subvp_prefetch_dramclk_khz)) {
+               clk_mgr_base->clks.subvp_prefetch_dramclk_khz = new_clocks->subvp_prefetch_dramclk_khz;
+               update_subvp_prefetch_dramclk = true;
+               subvp_prefetch_dramclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_dramclk_khz);
+       }
+
        /* FCLK */
        /* Always update saved value, even if new value not set due to P-State switching unsupported */
        if (should_set_clock(safe_to_lower, new_clocks->fclk_khz, clk_mgr_base->clks.fclk_khz)) {
@@ -1129,6 +1161,12 @@ static unsigned int dcn401_build_update_bandwidth_clocks_sequence(
                }
        }
 
+       if (should_set_clock(safe_to_lower, new_clocks->subvp_prefetch_fclk_khz, clk_mgr_base->clks.subvp_prefetch_fclk_khz)) {
+               clk_mgr_base->clks.subvp_prefetch_fclk_khz = new_clocks->subvp_prefetch_fclk_khz;
+               update_subvp_prefetch_fclk = true;
+               subvp_prefetch_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_fclk_khz);
+       }
+
        /* When idle DPM is enabled, need to send active and idle hardmins separately */
        /* CLK_MGR401_UPDATE_ACTIVE_HARDMINS */
        if ((update_active_uclk || update_active_fclk) && is_idle_dpm_enabled) {
@@ -1146,6 +1184,14 @@ static unsigned int dcn401_build_update_bandwidth_clocks_sequence(
                num_steps++;
        }
 
+       /* CLK_MGR401_UPDATE_SUBVP_HARDMINS */
+       if ((update_subvp_prefetch_dramclk || update_subvp_prefetch_fclk) && is_df_throttle_opt_enabled) {
+               block_sequence[num_steps].params.update_idle_hardmin_params.uclk_mhz = subvp_prefetch_dramclk_mhz;
+               block_sequence[num_steps].params.update_idle_hardmin_params.fclk_mhz = subvp_prefetch_fclk_mhz;
+               block_sequence[num_steps].func = CLK_MGR401_UPDATE_SUBVP_HARDMINS;
+               num_steps++;
+       }
+
        /* set UCLK to requested value if P-State switching is supported, or to re-enable P-State switching */
        if (update_active_uclk || update_idle_uclk) {
                if (!is_idle_dpm_enabled) {
index 8b0461992b2296353700ffefa7cc9e93850628e2..6c9ae5ca2c7e96975e384d3b76799ffd65c839d4 100644 (file)
@@ -90,6 +90,7 @@ enum dcn401_clk_mgr_block_sequence_func {
        CLK_MGR401_UPDATE_DTBCLK_DTO,
        CLK_MGR401_UPDATE_DENTIST,
        CLK_MGR401_UPDATE_PSR_WAIT_LOOP,
+       CLK_MGR401_UPDATE_SUBVP_HARDMINS,
 };
 
 struct dcn401_clk_mgr_block_sequence {
index 7700477d019b080200c405274e9d2b06ec75eb1a..b02a41179b41dd938335101933213a2c2d3b4db6 100644 (file)
 
 #define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
 
+/* temporary define */
+#ifndef DALSMC_MSG_SubvpUclkFclk
+#define DALSMC_MSG_SubvpUclkFclk 0x1B
+#endif
+
 /*
  * Function to be used instead of REG_WAIT macro because the wait ends when
  * the register is NOT EQUAL to zero, and because the translation in msg_if.h
@@ -296,6 +301,24 @@ bool dcn401_smu_set_active_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr,
        return success;
 }
 
+bool dcn401_smu_set_subvp_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr,
+               uint16_t uclk_freq_mhz,
+               uint16_t fclk_freq_mhz)
+{
+       uint32_t response = 0;
+       bool success;
+
+       /* 15:0 for uclk, 32:16 for fclk */
+       uint32_t param = (fclk_freq_mhz << 16) | uclk_freq_mhz;
+
+       smu_print("SMU Set active hardmin by freq: uclk_freq_mhz = %d MHz, fclk_freq_mhz = %d MHz\n", uclk_freq_mhz, fclk_freq_mhz);
+
+       success = dcn401_smu_send_msg_with_param(clk_mgr,
+                       DALSMC_MSG_SubvpUclkFclk, param, &response);
+
+       return success;
+}
+
 void dcn401_smu_set_min_deep_sleep_dcef_clk(struct clk_mgr_internal *clk_mgr, uint32_t freq_mhz)
 {
        smu_print("SMU Set min deep sleep dcef clk: freq_mhz = %d MHz\n", freq_mhz);
index 651fb8d6286455f2e9e9f1aebb51852dc9ee6b55..42cf7885a7cb01500c0c595e9b898517ec9025a5 100644 (file)
@@ -23,6 +23,9 @@ bool dcn401_smu_set_idle_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr,
 bool dcn401_smu_set_active_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr,
                uint16_t uclk_freq_mhz,
                uint16_t fclk_freq_mhz);
+bool dcn401_smu_set_subvp_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr,
+               uint16_t uclk_freq_mhz,
+               uint16_t fclk_freq_mhz);
 void dcn401_smu_set_min_deep_sleep_dcef_clk(struct clk_mgr_internal *clk_mgr, uint32_t freq_mhz);
 void dcn401_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t num_displays);
 
index 961b8245523cca8235d9420b2d89bf90731de7a9..aef70bcde3558668d380507f758cf6f1d0b25f52 100644 (file)
@@ -629,6 +629,8 @@ struct dc_clocks {
        int bw_dispclk_khz;
        int idle_dramclk_khz;
        int idle_fclk_khz;
+       int subvp_prefetch_dramclk_khz;
+       int subvp_prefetch_fclk_khz;
 };
 
 struct dc_bw_validation_profile {
@@ -1072,6 +1074,7 @@ struct dc_debug_options {
        bool skip_full_updated_if_possible;
        unsigned int enable_oled_edp_power_up_opt;
        bool enable_hblank_borrow;
+       bool force_subvp_df_throttle;
 };
 
 
index 730bf35e6043ad86311be26f68adbfb9fc99eb01..efb09990549658fa16d302d32cc9d740f52fdb27 100644 (file)
@@ -1077,6 +1077,8 @@ void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state
        context->bw_ctx.bw.dcn.clk.dtbclk_en = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz > 0;
        context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz;
        context->bw_ctx.bw.dcn.clk.socclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.socclk_khz;
+       context->bw_ctx.bw.dcn.clk.subvp_prefetch_dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz;
+       context->bw_ctx.bw.dcn.clk.subvp_prefetch_fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz;
 }
 
 void dml21_extract_legacy_watermark_set(const struct dc *in_dc, struct dcn_watermarks *watermark, enum dml2_dchub_watermark_reg_set_index reg_set_idx, struct dml2_context *in_ctx)
index b2ae6232673b317d9f5fd67a8273ac23aacf57c3..d2d053f2354d00998e8cd656ce7595e67077a612 100644 (file)
@@ -387,6 +387,11 @@ struct dml2_display_cfg_programming {
                                unsigned long fclk_khz;
                                unsigned long dcfclk_khz;
                        } svp_prefetch;
+                       struct {
+                               unsigned long uclk_khz;
+                               unsigned long fclk_khz;
+                               unsigned long dcfclk_khz;
+                       } svp_prefetch_no_throttle;
 
                        unsigned long deepsleep_dcfclk_khz;
                        unsigned long dispclk_khz;
index 009026950b6c8ea0dbcd57d08b5713683eb0b626..8a78b9adfc623432ef4f01f27f304f36e00f2013 100644 (file)
@@ -96,6 +96,7 @@ static void calculate_svp_prefetch_minimums(struct dml2_dpmm_map_mode_to_soc_dpm
        double min_uclk_latency;
        const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result;
 
+       /* assumes DF throttling is enabled */
        min_uclk_avg = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.average_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config);
        min_uclk_avg = (double)min_uclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_average.dram_derate_percent_pixel / 100);
 
@@ -125,6 +126,37 @@ static void calculate_svp_prefetch_minimums(struct dml2_dpmm_map_mode_to_soc_dpm
        in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency);
        in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency);
        in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency);
+
+       /* assumes DF throttling is disabled */
+       min_uclk_avg = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.average_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config);
+       min_uclk_avg = (double)min_uclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel / 100);
+
+       min_uclk_urgent = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.urgent_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config);
+       min_uclk_urgent = (double)min_uclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100);
+
+       min_uclk_bw = min_uclk_urgent > min_uclk_avg ? min_uclk_urgent : min_uclk_avg;
+
+       min_fclk_avg = (double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes;
+       min_fclk_avg = (double)min_fclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.fclk_derate_percent / 100);
+
+       min_fclk_urgent = (double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes;
+       min_fclk_urgent = (double)min_fclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100);
+
+       min_fclk_bw = min_fclk_urgent > min_fclk_avg ? min_fclk_urgent : min_fclk_avg;
+
+       min_dcfclk_avg = (double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes;
+       min_dcfclk_avg = (double)min_dcfclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent / 100);
+
+       min_dcfclk_urgent = (double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes;
+       min_dcfclk_urgent = (double)min_dcfclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100);
+
+       min_dcfclk_bw = min_dcfclk_urgent > min_dcfclk_avg ? min_dcfclk_urgent : min_dcfclk_avg;
+
+       get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency);
+
+       in_out->programming->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency);
+       in_out->programming->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency);
+       in_out->programming->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency);
 }
 
 static void calculate_idle_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out)
@@ -272,6 +304,17 @@ static bool map_soc_min_clocks_to_dpm_fine_grained(struct dml2_display_cfg_progr
        if (result)
                result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.uclk_khz, &state_table->uclk);
 
+       /* these clocks are optional, so they can fail to map, in which case map all to 0 */
+       if (result) {
+               if (!round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz, &state_table->dcfclk) ||
+                               !round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz, &state_table->fclk) ||
+                               !round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz, &state_table->uclk)) {
+                       display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz = 0;
+                       display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz = 0;
+                       display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz = 0;
+               }
+       }
+
        return result;
 }