2 * Copyright 2022 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
26 // header file of functions being implemented
27 #include "dcn32_resource.h"
28 #include "dcn20/dcn20_resource.h"
29 #include "dml/dcn32/display_mode_vba_util_32.h"
30 #include "dml/dcn32/dcn32_fpu.h"
32 static bool is_dual_plane(enum surface_pixel_format format)
34 return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA;
38 uint32_t dcn32_helper_mall_bytes_to_ways(
40 uint32_t total_size_in_mall_bytes)
42 uint32_t cache_lines_used, lines_per_way, total_cache_lines, num_ways;
44 /* add 2 lines for worst case alignment */
45 cache_lines_used = total_size_in_mall_bytes / dc->caps.cache_line_size + 2;
47 total_cache_lines = dc->caps.max_cab_allocation_bytes / dc->caps.cache_line_size;
48 lines_per_way = total_cache_lines / dc->caps.cache_num_ways;
49 num_ways = cache_lines_used / lines_per_way;
50 if (cache_lines_used % lines_per_way > 0)
56 uint32_t dcn32_helper_calculate_mall_bytes_for_cursor(
58 struct pipe_ctx *pipe_ctx,
59 bool ignore_cursor_buf)
61 struct hubp *hubp = pipe_ctx->plane_res.hubp;
62 uint32_t cursor_size = hubp->curs_attr.pitch * hubp->curs_attr.height;
63 uint32_t cursor_mall_size_bytes = 0;
65 switch (pipe_ctx->stream->cursor_attributes.color_format) {
66 case CURSOR_MODE_MONO:
69 case CURSOR_MODE_COLOR_1BIT_AND:
70 case CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA:
71 case CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA:
75 case CURSOR_MODE_COLOR_64BIT_FP_PRE_MULTIPLIED:
76 case CURSOR_MODE_COLOR_64BIT_FP_UN_PRE_MULTIPLIED:
81 /* only count if cursor is enabled, and if additional allocation needed outside of the
84 if (pipe_ctx->stream->cursor_position.enable && (ignore_cursor_buf ||
85 cursor_size > 16384)) {
86 /* cursor_num_mblk = CEILING(num_cursors*cursor_width*cursor_width*cursor_Bpe/mblk_bytes, 1)
87 * Note: add 1 mblk in case of cursor misalignment
89 cursor_mall_size_bytes = ((cursor_size + DCN3_2_MALL_MBLK_SIZE_BYTES - 1) /
90 DCN3_2_MALL_MBLK_SIZE_BYTES + 1) * DCN3_2_MALL_MBLK_SIZE_BYTES;
93 return cursor_mall_size_bytes;
97 * ********************************************************************************************
98 * dcn32_helper_calculate_num_ways_for_subvp: Calculate number of ways needed for SubVP
100 * Gets total allocation required for the phantom viewport calculated by DML in bytes and
101 * converts to number of cache ways.
103 * @param [in] dc: current dc state
104 * @param [in] context: new dc state
106 * @return: number of ways required for SubVP
108 * ********************************************************************************************
110 uint32_t dcn32_helper_calculate_num_ways_for_subvp(
112 struct dc_state *context)
114 if (context->bw_ctx.bw.dcn.mall_subvp_size_bytes > 0) {
115 if (dc->debug.force_subvp_num_ways) {
116 return dc->debug.force_subvp_num_ways;
118 return dcn32_helper_mall_bytes_to_ways(dc, context->bw_ctx.bw.dcn.mall_subvp_size_bytes);
125 void dcn32_merge_pipes_for_subvp(struct dc *dc,
126 struct dc_state *context)
130 /* merge pipes if necessary */
131 for (i = 0; i < dc->res_pool->pipe_count; i++) {
132 struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
134 // For now merge all pipes for SubVP since pipe split case isn't supported yet
136 /* if ODM merge we ignore mpc tree, mpo pipes will have their own flags */
137 if (pipe->prev_odm_pipe) {
138 /*split off odm pipe*/
139 pipe->prev_odm_pipe->next_odm_pipe = pipe->next_odm_pipe;
140 if (pipe->next_odm_pipe)
141 pipe->next_odm_pipe->prev_odm_pipe = pipe->prev_odm_pipe;
143 pipe->bottom_pipe = NULL;
144 pipe->next_odm_pipe = NULL;
145 pipe->plane_state = NULL;
147 pipe->top_pipe = NULL;
148 pipe->prev_odm_pipe = NULL;
149 if (pipe->stream_res.dsc)
150 dcn20_release_dsc(&context->res_ctx, dc->res_pool, &pipe->stream_res.dsc);
151 memset(&pipe->plane_res, 0, sizeof(pipe->plane_res));
152 memset(&pipe->stream_res, 0, sizeof(pipe->stream_res));
153 } else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) {
154 struct pipe_ctx *top_pipe = pipe->top_pipe;
155 struct pipe_ctx *bottom_pipe = pipe->bottom_pipe;
157 top_pipe->bottom_pipe = bottom_pipe;
159 bottom_pipe->top_pipe = top_pipe;
161 pipe->top_pipe = NULL;
162 pipe->bottom_pipe = NULL;
163 pipe->plane_state = NULL;
165 memset(&pipe->plane_res, 0, sizeof(pipe->plane_res));
166 memset(&pipe->stream_res, 0, sizeof(pipe->stream_res));
171 bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc,
172 struct dc_state *context)
176 for (i = 0; i < dc->res_pool->pipe_count; i++) {
177 struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
182 if (!pipe->plane_state)
188 bool dcn32_subvp_in_use(struct dc *dc,
189 struct dc_state *context)
193 for (i = 0; i < dc->res_pool->pipe_count; i++) {
194 struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
196 if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE)
202 bool dcn32_mpo_in_use(struct dc_state *context)
206 for (i = 0; i < context->stream_count; i++) {
207 if (context->stream_status[i].plane_count > 1)
214 bool dcn32_any_surfaces_rotated(struct dc *dc, struct dc_state *context)
218 for (i = 0; i < dc->res_pool->pipe_count; i++) {
219 struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
224 if (pipe->plane_state && pipe->plane_state->rotation != ROTATION_ANGLE_0)
230 bool dcn32_is_center_timing(struct pipe_ctx *pipe)
232 bool is_center_timing = false;
235 if (pipe->stream->timing.v_addressable != pipe->stream->dst.height ||
236 pipe->stream->timing.v_addressable != pipe->stream->src.height) {
237 is_center_timing = true;
241 if (pipe->plane_state) {
242 if (pipe->stream->timing.v_addressable != pipe->plane_state->dst_rect.height &&
243 pipe->stream->timing.v_addressable != pipe->plane_state->src_rect.height) {
244 is_center_timing = true;
248 return is_center_timing;
251 bool dcn32_is_psr_capable(struct pipe_ctx *pipe)
253 bool psr_capable = false;
255 if (pipe->stream && pipe->stream->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED) {
261 #define DCN3_2_NEW_DET_OVERRIDE_MIN_MULTIPLIER 7
264 * *******************************************************************************************
265 * dcn32_determine_det_override: Determine DET allocation for each pipe
267 * This function determines how much DET to allocate for each pipe. The total number of
268 * DET segments will be split equally among each of the streams, and after that the DET
269 * segments per stream will be split equally among the planes for the given stream.
271 * If there is a plane that's driven by more than 1 pipe (i.e. pipe split), then the
272 * number of DET for that given plane will be split among the pipes driving that plane.
274 * High level algorithm:
275 * 1. Split total DET among number of streams
276 * 2. For each stream, split DET among the planes
277 * 3. For each plane, check if there is a pipe split. If yes, split the DET allocation
279 * 4. Assign the DET override to the DML pipes.
283 * For two displays that have a large difference in pixel rate, we may experience
284 * underflow on the larger display when we divide the DET equally. For this, we
285 * will implement a modified algorithm to assign more DET to larger display.
287 * 1. Calculate difference in pixel rates ( multiplier ) between two displays
288 * 2. If the multiplier exceeds DCN3_2_NEW_DET_OVERRIDE_MIN_MULTIPLIER, then
289 * implement the modified DET override algorithm.
290 * 3. Assign smaller DET size for lower pixel display and higher DET size for
291 * higher pixel display
293 * @param [in]: dc: Current DC state
294 * @param [in]: context: New DC state to be programmed
295 * @param [in]: pipes: Array of DML pipes
299 * *******************************************************************************************
301 void dcn32_determine_det_override(struct dc *dc,
302 struct dc_state *context,
303 display_e2e_pipe_params_st *pipes)
306 uint8_t pipe_plane_count, stream_segments, plane_segments, pipe_segments[MAX_PIPES] = {0};
307 uint8_t pipe_counted[MAX_PIPES] = {0};
308 uint8_t pipe_cnt = 0;
309 struct dc_plane_state *current_plane = NULL;
310 uint8_t stream_count = 0;
312 int phy_pix_clk_mult, lower_mode_stream_index;
313 int phy_pix_clk[MAX_PIPES] = {0};
314 bool use_new_det_override_algorithm = false;
316 for (i = 0; i < context->stream_count; i++) {
317 /* Don't count SubVP streams for DET allocation */
318 if (context->streams[i]->mall_stream_config.type != SUBVP_PHANTOM) {
319 phy_pix_clk[i] = context->streams[i]->phy_pix_clk;
324 /* Check for special case with two displays, one with much higher pixel rate */
325 if (stream_count == 2) {
326 ASSERT((phy_pix_clk[0] > 0) && (phy_pix_clk[1] > 0));
327 if (phy_pix_clk[0] < phy_pix_clk[1]) {
328 lower_mode_stream_index = 0;
329 phy_pix_clk_mult = phy_pix_clk[1] / phy_pix_clk[0];
331 lower_mode_stream_index = 1;
332 phy_pix_clk_mult = phy_pix_clk[0] / phy_pix_clk[1];
335 if (phy_pix_clk_mult >= DCN3_2_NEW_DET_OVERRIDE_MIN_MULTIPLIER)
336 use_new_det_override_algorithm = true;
339 if (stream_count > 0) {
340 stream_segments = 18 / stream_count;
341 for (i = 0; i < context->stream_count; i++) {
342 if (context->streams[i]->mall_stream_config.type == SUBVP_PHANTOM)
345 if (use_new_det_override_algorithm) {
346 if (i == lower_mode_stream_index)
349 stream_segments = 14;
352 if (context->stream_status[i].plane_count > 0)
353 plane_segments = stream_segments / context->stream_status[i].plane_count;
355 plane_segments = stream_segments;
356 for (j = 0; j < dc->res_pool->pipe_count; j++) {
357 pipe_plane_count = 0;
358 if (context->res_ctx.pipe_ctx[j].stream == context->streams[i] &&
359 pipe_counted[j] != 1) {
360 /* Note: pipe_plane_count indicates the number of pipes to be used for a
361 * given plane. e.g. pipe_plane_count = 1 means single pipe (i.e. not split),
362 * pipe_plane_count = 2 means 2:1 split, etc.
366 current_plane = context->res_ctx.pipe_ctx[j].plane_state;
367 for (k = 0; k < dc->res_pool->pipe_count; k++) {
368 if (k != j && context->res_ctx.pipe_ctx[k].stream == context->streams[i] &&
369 context->res_ctx.pipe_ctx[k].plane_state == current_plane) {
375 pipe_segments[j] = plane_segments / pipe_plane_count;
376 for (k = 0; k < dc->res_pool->pipe_count; k++) {
377 if (k != j && context->res_ctx.pipe_ctx[k].stream == context->streams[i] &&
378 context->res_ctx.pipe_ctx[k].plane_state == current_plane) {
379 pipe_segments[k] = plane_segments / pipe_plane_count;
386 for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
387 if (!context->res_ctx.pipe_ctx[i].stream)
389 pipes[pipe_cnt].pipe.src.det_size_override = pipe_segments[i] * DCN3_2_DET_SEG_SIZE;
393 for (i = 0; i < dc->res_pool->pipe_count; i++)
394 pipes[i].pipe.src.det_size_override = 4 * DCN3_2_DET_SEG_SIZE; //DCN3_2_DEFAULT_DET_SIZE
398 void dcn32_set_det_allocations(struct dc *dc, struct dc_state *context,
399 display_e2e_pipe_params_st *pipes)
402 struct resource_context *res_ctx = &context->res_ctx;
403 struct pipe_ctx *pipe;
404 bool disable_unbounded_requesting = dc->debug.disable_z9_mpc || dc->debug.disable_unbounded_requesting;
406 for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
408 if (!res_ctx->pipe_ctx[i].stream)
411 pipe = &res_ctx->pipe_ctx[i];
415 /* For DET allocation, we don't want to use DML policy (not optimal for utilizing all
416 * the DET available for each pipe). Use the DET override input to maintain our driver
420 pipes[0].pipe.src.det_size_override = DCN3_2_MAX_DET_SIZE;
421 if (pipe->plane_state && !disable_unbounded_requesting && pipe->plane_state->tiling_info.gfx9.swizzle != DC_SW_LINEAR) {
422 if (!is_dual_plane(pipe->plane_state->format)) {
423 pipes[0].pipe.src.det_size_override = DCN3_2_DEFAULT_DET_SIZE;
424 pipes[0].pipe.src.unbounded_req_mode = true;
425 if (pipe->plane_state->src_rect.width >= 5120 &&
426 pipe->plane_state->src_rect.height >= 2880)
427 pipes[0].pipe.src.det_size_override = 320; // 5K or higher
431 dcn32_determine_det_override(dc, context, pipes);
435 * *******************************************************************************************
436 * dcn32_save_mall_state: Save MALL (SubVP) state for fast validation cases
438 * This function saves the MALL (SubVP) case for fast validation cases. For fast validation,
439 * there are situations where a shallow copy of the dc->current_state is created for the
440 * validation. In this case we want to save and restore the mall config because we always
441 * teardown subvp at the beginning of validation (and don't attempt to add it back if it's
442 * fast validation). If we don't restore the subvp config in cases of fast validation +
443 * shallow copy of the dc->current_state, the dc->current_state will have a partially
444 * removed subvp state when we did not intend to remove it.
446 * NOTE: This function ONLY works if the streams are not moved to a different pipe in the
447 * validation. We don't expect this to happen in fast_validation=1 cases.
449 * @param [in]: dc: Current DC state
450 * @param [in]: context: New DC state to be programmed
451 * @param [out]: temp_config: struct used to cache the existing MALL state
455 * *******************************************************************************************
457 void dcn32_save_mall_state(struct dc *dc,
458 struct dc_state *context,
459 struct mall_temp_config *temp_config)
463 for (i = 0; i < dc->res_pool->pipe_count; i++) {
464 struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
467 temp_config->mall_stream_config[i] = pipe->stream->mall_stream_config;
469 if (pipe->plane_state)
470 temp_config->is_phantom_plane[i] = pipe->plane_state->is_phantom;
475 * *******************************************************************************************
476 * dcn32_restore_mall_state: Restore MALL (SubVP) state for fast validation cases
478 * Restore the MALL state based on the previously saved state from dcn32_save_mall_state
480 * @param [in]: dc: Current DC state
481 * @param [in/out]: context: New DC state to be programmed, restore MALL state into here
482 * @param [in]: temp_config: struct that has the cached MALL state
486 * *******************************************************************************************
488 void dcn32_restore_mall_state(struct dc *dc,
489 struct dc_state *context,
490 struct mall_temp_config *temp_config)
494 for (i = 0; i < dc->res_pool->pipe_count; i++) {
495 struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
498 pipe->stream->mall_stream_config = temp_config->mall_stream_config[i];
500 if (pipe->plane_state)
501 pipe->plane_state->is_phantom = temp_config->is_phantom_plane[i];
505 #define MAX_STRETCHED_V_BLANK 1000 // in micro-seconds (must ensure to match value in FW)
507 * Scaling factor for v_blank stretch calculations considering timing in
508 * micro-seconds and pixel clock in 100hz.
509 * Note: the parenthesis are necessary to ensure the correct order of
510 * operation where V_SCALE is used.
512 #define V_SCALE (10000 / MAX_STRETCHED_V_BLANK)
514 static int get_frame_rate_at_max_stretch_100hz(
515 struct dc_stream_state *fpo_candidate_stream,
516 uint32_t fpo_vactive_margin_us)
518 struct dc_crtc_timing *timing = NULL;
519 uint32_t sec_per_100_lines;
520 uint32_t max_v_blank;
521 uint32_t curr_v_blank;
522 uint32_t v_stretch_max;
523 uint32_t stretched_frame_pix_cnt;
524 uint32_t scaled_stretched_frame_pix_cnt;
525 uint32_t scaled_refresh_rate;
528 if (fpo_candidate_stream == NULL)
531 /* check if refresh rate at least 120hz */
532 timing = &fpo_candidate_stream->timing;
536 v_scale = 10000 / (MAX_STRETCHED_V_BLANK + fpo_vactive_margin_us);
538 sec_per_100_lines = timing->pix_clk_100hz / timing->h_total + 1;
539 max_v_blank = sec_per_100_lines / v_scale + 1;
540 curr_v_blank = timing->v_total - timing->v_addressable;
541 v_stretch_max = (max_v_blank > curr_v_blank) ? (max_v_blank - curr_v_blank) : (0);
542 stretched_frame_pix_cnt = (v_stretch_max + timing->v_total) * timing->h_total;
543 scaled_stretched_frame_pix_cnt = stretched_frame_pix_cnt / 10000;
544 scaled_refresh_rate = (timing->pix_clk_100hz) / scaled_stretched_frame_pix_cnt + 1;
546 return scaled_refresh_rate;
550 static bool is_refresh_rate_support_mclk_switch_using_fw_based_vblank_stretch(
551 struct dc_stream_state *fpo_candidate_stream, uint32_t fpo_vactive_margin_us)
553 int refresh_rate_max_stretch_100hz;
554 int min_refresh_100hz;
556 if (fpo_candidate_stream == NULL)
559 refresh_rate_max_stretch_100hz = get_frame_rate_at_max_stretch_100hz(fpo_candidate_stream, fpo_vactive_margin_us);
560 min_refresh_100hz = fpo_candidate_stream->timing.min_refresh_in_uhz / 10000;
562 if (refresh_rate_max_stretch_100hz < min_refresh_100hz)
568 static int get_refresh_rate(struct dc_stream_state *fpo_candidate_stream)
570 int refresh_rate = 0;
572 struct dc_crtc_timing *timing = NULL;
574 if (fpo_candidate_stream == NULL)
577 /* check if refresh rate at least 120hz */
578 timing = &fpo_candidate_stream->timing;
582 h_v_total = timing->h_total * timing->v_total;
586 refresh_rate = ((timing->pix_clk_100hz * 100) / (h_v_total)) + 1;
591 * dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch - Determines if config can support FPO
593 * @param [in]: dc - current dc state
594 * @param [in]: context - new dc state
596 * Return: Pointer to FPO stream candidate if config can support FPO, otherwise NULL
598 struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, const struct dc_state *context)
600 int refresh_rate = 0;
601 const int minimum_refreshrate_supported = 120;
602 struct dc_stream_state *fpo_candidate_stream = NULL;
603 bool is_fpo_vactive = false;
604 uint32_t fpo_vactive_margin_us = 0;
609 if (dc->debug.disable_fams)
612 if (!dc->caps.dmub_caps.mclk_sw)
615 if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching_shut_down)
618 /* For FPO we can support up to 2 display configs if:
619 * - first display uses FPO
620 * - Second display switches in VACTIVE */
621 if (context->stream_count > 2)
623 else if (context->stream_count == 2) {
625 dcn32_assign_fpo_vactive_candidate(dc, context, &fpo_candidate_stream);
629 is_fpo_vactive = dcn32_find_vactive_pipe(dc, context, DCN3_2_MIN_ACTIVE_SWITCH_MARGIN_FPO_US);
631 if (!is_fpo_vactive || dc->debug.disable_fpo_vactive)
634 fpo_candidate_stream = context->streams[0];
636 if (!fpo_candidate_stream)
639 if (fpo_candidate_stream->sink->edid_caps.panel_patch.disable_fams)
642 refresh_rate = get_refresh_rate(fpo_candidate_stream);
643 if (refresh_rate < minimum_refreshrate_supported)
646 fpo_vactive_margin_us = is_fpo_vactive ? dc->debug.fpo_vactive_margin_us : 0; // For now hardcode the FPO + Vactive stretch margin to be 2000us
647 if (!is_refresh_rate_support_mclk_switch_using_fw_based_vblank_stretch(fpo_candidate_stream, fpo_vactive_margin_us))
650 // check if freesync enabled
651 if (!fpo_candidate_stream->allow_freesync)
654 if (fpo_candidate_stream->vrr_active_variable)
657 return fpo_candidate_stream;