2 * Copyright 2022 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
25 #include "display_mode_vba_util_32.h"
26 #include "../dml_inline_defs.h"
27 #include "display_mode_vba_32.h"
28 #include "../display_mode_lib.h"
30 unsigned int dml32_dscceComputeDelay(
33 unsigned int sliceWidth,
34 unsigned int numSlices,
35 enum output_format_class pixelFormat,
36 enum output_encoder_class Output)
38 // valid bpc = source bits per component in the set of {8, 10, 12}
39 // valid bpp = increments of 1/16 of a bit
40 // min = 6/7/8 in N420/N422/444, respectively
41 // max = such that compression is 1:1
42 //valid sliceWidth = number of pixels per slice line,
43 // must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
44 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
45 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
48 unsigned int rcModelSize = 8192;
50 // N422/N420 operate at 2 pixels per clock
51 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
54 if (pixelFormat == dm_420)
56 else if (pixelFormat == dm_n422)
58 // #all other modes operate at 1 pixel per clock
62 //initial transmit delay as per PPS
63 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
73 //divide by pixel per cycle to compute slice width as seen by DSC
74 w = sliceWidth / pixelsPerClock;
76 //422 mode has an additional cycle of delay
77 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
82 //main calculation for the dscce
83 ix = initalXmitDelay + 45;
88 ax = (a + 2) / 3 + D + 6 + 1;
89 L = (ax + wx - 1) / wx;
90 if ((ix % w) == 0 && p != 0)
94 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
96 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
97 pixels = Delay * 3 * pixelsPerClock;
99 #ifdef __DML_VBA_DEBUG__
100 dml_print("DML::%s: bpc: %d\n", __func__, bpc);
101 dml_print("DML::%s: BPP: %f\n", __func__, BPP);
102 dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
103 dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
104 dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
105 dml_print("DML::%s: Output: %d\n", __func__, Output);
106 dml_print("DML::%s: pixels: %d\n", __func__, pixels);
112 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
114 unsigned int Delay = 0;
116 if (pixelFormat == dm_420) {
121 // dscc - input deserializer
123 // dscc gets pixels every other cycle
125 // dscc - input cdc fifo
127 // dscc gets pixels every other cycle
129 // dscc - cdc uncertainty
131 // dscc - output cdc fifo
133 // dscc gets pixels every other cycle
135 // dscc - cdc uncertainty
137 // dscc - output serializer
141 } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
146 // dscc - input deserializer
148 // dscc - input cdc fifo
150 // dscc - cdc uncertainty
152 // dscc - output cdc fifo
154 // dscc - cdc uncertainty
156 // dscc - output serializer
165 // dscc - input deserializer
167 // dscc - input cdc fifo
169 // dscc - cdc uncertainty
171 // dscc - output cdc fifo
173 // dscc - output serializer
175 // dscc - cdc uncertainty
185 bool IsVertical(enum dm_rotation_angle Scan)
187 bool is_vert = false;
189 if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
196 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
201 double MaxDCHUBToPSCLThroughput,
202 double MaxPSCLToLBThroughput,
204 enum source_format_class SourcePixelFormat,
206 unsigned int HTapsChroma,
208 unsigned int VTapsChroma,
211 double *PSCL_THROUGHPUT,
212 double *PSCL_THROUGHPUT_CHROMA,
213 double *DPPCLKUsingSingleDPP)
215 double DPPCLKUsingSingleDPPLuma;
216 double DPPCLKUsingSingleDPPChroma;
219 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
220 dml_ceil((double) HTaps / 6.0, 1.0));
222 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
225 DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
226 *PSCL_THROUGHPUT, 1);
228 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
229 DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
231 if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
232 SourcePixelFormat != dm_rgbe_alpha)) {
233 *PSCL_THROUGHPUT_CHROMA = 0;
234 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
236 if (HRatioChroma > 1) {
237 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
238 HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
240 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
242 DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
243 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
244 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
245 DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
246 *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
250 void dml32_CalculateBytePerPixelAndBlockSizes(
251 enum source_format_class SourcePixelFormat,
252 enum dm_swizzle_mode SurfaceTiling,
255 unsigned int *BytePerPixelY,
256 unsigned int *BytePerPixelC,
257 double *BytePerPixelDETY,
258 double *BytePerPixelDETC,
259 unsigned int *BlockHeight256BytesY,
260 unsigned int *BlockHeight256BytesC,
261 unsigned int *BlockWidth256BytesY,
262 unsigned int *BlockWidth256BytesC,
263 unsigned int *MacroTileHeightY,
264 unsigned int *MacroTileHeightC,
265 unsigned int *MacroTileWidthY,
266 unsigned int *MacroTileWidthC)
268 if (SourcePixelFormat == dm_444_64) {
269 *BytePerPixelDETY = 8;
270 *BytePerPixelDETC = 0;
273 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
274 *BytePerPixelDETY = 4;
275 *BytePerPixelDETC = 0;
278 } else if (SourcePixelFormat == dm_444_16) {
279 *BytePerPixelDETY = 2;
280 *BytePerPixelDETC = 0;
283 } else if (SourcePixelFormat == dm_444_8) {
284 *BytePerPixelDETY = 1;
285 *BytePerPixelDETC = 0;
288 } else if (SourcePixelFormat == dm_rgbe_alpha) {
289 *BytePerPixelDETY = 4;
290 *BytePerPixelDETC = 1;
293 } else if (SourcePixelFormat == dm_420_8) {
294 *BytePerPixelDETY = 1;
295 *BytePerPixelDETC = 2;
298 } else if (SourcePixelFormat == dm_420_12) {
299 *BytePerPixelDETY = 2;
300 *BytePerPixelDETC = 4;
304 *BytePerPixelDETY = 4.0 / 3;
305 *BytePerPixelDETC = 8.0 / 3;
309 #ifdef __DML_VBA_DEBUG__
310 dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
311 dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
312 dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
313 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, *BytePerPixelY);
314 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, *BytePerPixelC);
316 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
317 || SourcePixelFormat == dm_444_16
318 || SourcePixelFormat == dm_444_8
319 || SourcePixelFormat == dm_mono_16
320 || SourcePixelFormat == dm_mono_8
321 || SourcePixelFormat == dm_rgbe)) {
322 if (SurfaceTiling == dm_sw_linear)
323 *BlockHeight256BytesY = 1;
324 else if (SourcePixelFormat == dm_444_64)
325 *BlockHeight256BytesY = 4;
326 else if (SourcePixelFormat == dm_444_8)
327 *BlockHeight256BytesY = 16;
329 *BlockHeight256BytesY = 8;
331 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
332 *BlockHeight256BytesC = 0;
333 *BlockWidth256BytesC = 0;
335 if (SurfaceTiling == dm_sw_linear) {
336 *BlockHeight256BytesY = 1;
337 *BlockHeight256BytesC = 1;
338 } else if (SourcePixelFormat == dm_rgbe_alpha) {
339 *BlockHeight256BytesY = 8;
340 *BlockHeight256BytesC = 16;
341 } else if (SourcePixelFormat == dm_420_8) {
342 *BlockHeight256BytesY = 16;
343 *BlockHeight256BytesC = 8;
345 *BlockHeight256BytesY = 8;
346 *BlockHeight256BytesC = 8;
348 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
349 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
351 #ifdef __DML_VBA_DEBUG__
352 dml_print("DML::%s: BlockWidth256BytesY = %d\n", __func__, *BlockWidth256BytesY);
353 dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
354 dml_print("DML::%s: BlockWidth256BytesC = %d\n", __func__, *BlockWidth256BytesC);
355 dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
358 if (SurfaceTiling == dm_sw_linear) {
359 *MacroTileHeightY = *BlockHeight256BytesY;
360 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
361 *MacroTileHeightC = *BlockHeight256BytesC;
362 if (*MacroTileHeightC == 0)
363 *MacroTileWidthC = 0;
365 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
366 } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
367 SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
368 *MacroTileHeightY = 16 * *BlockHeight256BytesY;
369 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
370 *MacroTileHeightC = 16 * *BlockHeight256BytesC;
371 if (*MacroTileHeightC == 0)
372 *MacroTileWidthC = 0;
374 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
376 *MacroTileHeightY = 32 * *BlockHeight256BytesY;
377 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
378 *MacroTileHeightC = 32 * *BlockHeight256BytesC;
379 if (*MacroTileHeightC == 0)
380 *MacroTileWidthC = 0;
382 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
385 #ifdef __DML_VBA_DEBUG__
386 dml_print("DML::%s: MacroTileWidthY = %d\n", __func__, *MacroTileWidthY);
387 dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
388 dml_print("DML::%s: MacroTileWidthC = %d\n", __func__, *MacroTileWidthC);
389 dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
391 } // CalculateBytePerPixelAndBlockSizes
393 void dml32_CalculateSwathAndDETConfiguration(
394 unsigned int DETSizeOverride[],
395 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
396 unsigned int ConfigReturnBufferSizeInKByte,
397 unsigned int MaxTotalDETInKByte,
398 unsigned int MinCompressedBufferSizeInKByte,
399 double ForceSingleDPP,
400 unsigned int NumberOfActiveSurfaces,
401 unsigned int nomDETInKByte,
402 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
403 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
404 unsigned int PixelChunkSizeKBytes,
405 unsigned int ROBSizeKBytes,
406 unsigned int CompressedBufferSegmentSizeInkByteFinal,
407 enum output_encoder_class Output[],
408 double ReadBandwidthLuma[],
409 double ReadBandwidthChroma[],
410 double MaximumSwathWidthLuma[],
411 double MaximumSwathWidthChroma[],
412 enum dm_rotation_angle SourceRotation[],
413 bool ViewportStationary[],
414 enum source_format_class SourcePixelFormat[],
415 enum dm_swizzle_mode SurfaceTiling[],
416 unsigned int ViewportWidth[],
417 unsigned int ViewportHeight[],
418 unsigned int ViewportXStart[],
419 unsigned int ViewportYStart[],
420 unsigned int ViewportXStartC[],
421 unsigned int ViewportYStartC[],
422 unsigned int SurfaceWidthY[],
423 unsigned int SurfaceWidthC[],
424 unsigned int SurfaceHeightY[],
425 unsigned int SurfaceHeightC[],
426 unsigned int Read256BytesBlockHeightY[],
427 unsigned int Read256BytesBlockHeightC[],
428 unsigned int Read256BytesBlockWidthY[],
429 unsigned int Read256BytesBlockWidthC[],
430 enum odm_combine_mode ODMMode[],
431 unsigned int BlendingAndTiming[],
432 unsigned int BytePerPixY[],
433 unsigned int BytePerPixC[],
434 double BytePerPixDETY[],
435 double BytePerPixDETC[],
436 unsigned int HActive[],
438 double HRatioChroma[],
439 unsigned int DPPPerSurface[],
442 unsigned int swath_width_luma_ub[],
443 unsigned int swath_width_chroma_ub[],
445 double SwathWidthChroma[],
446 unsigned int SwathHeightY[],
447 unsigned int SwathHeightC[],
448 unsigned int DETBufferSizeInKByte[],
449 unsigned int DETBufferSizeY[],
450 unsigned int DETBufferSizeC[],
451 bool *UnboundedRequestEnabled,
452 unsigned int *CompressedBufferSizeInkByte,
453 unsigned int *CompBufReservedSpaceKBytes,
454 bool *CompBufReservedSpaceNeedAdjustment,
455 bool ViewportSizeSupportPerSurface[],
456 bool *ViewportSizeSupport)
458 unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
459 unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
460 unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
461 unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
462 unsigned int RoundedUpSwathSizeBytesY;
463 unsigned int RoundedUpSwathSizeBytesC;
464 double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
465 double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
467 unsigned int TotalActiveDPP = 0;
468 bool NoChromaSurfaces = true;
469 unsigned int DETBufferSizeInKByteForSwathCalculation;
471 #ifdef __DML_VBA_DEBUG__
472 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
473 dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes);
474 dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes);
476 dml32_CalculateSwathWidth(ForceSingleDPP,
477 NumberOfActiveSurfaces,
494 Read256BytesBlockHeightY,
495 Read256BytesBlockHeightC,
496 Read256BytesBlockWidthY,
497 Read256BytesBlockWidthC,
505 SwathWidthdoubleDPPChroma,
511 swath_width_chroma_ub);
513 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
514 RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
515 RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
516 #ifdef __DML_VBA_DEBUG__
517 dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
518 dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
519 dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
520 dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]);
521 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
522 RoundedUpMaxSwathSizeBytesY[k]);
523 dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
524 dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
525 dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]);
526 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
527 RoundedUpMaxSwathSizeBytesC[k]);
530 if (SourcePixelFormat[k] == dm_420_10) {
531 RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256);
532 RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256);
536 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
537 TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
538 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
539 SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
540 NoChromaSurfaces = false;
544 // By default, just set the reserved space to 2 pixel chunks size
545 *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2;
547 // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
548 // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
549 // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
550 *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512);
552 if (*CompBufReservedSpaceNeedAdjustment == 1) {
553 *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512;
556 #ifdef __DML_VBA_DEBUG__
557 dml_print("DML::%s: CompBufReservedSpaceKBytes = %d\n", __func__, *CompBufReservedSpaceKBytes);
558 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment);
561 *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
563 dml32_CalculateDETBufferSize(DETSizeOverride,
564 UseMALLForPStateChange,
566 NumberOfActiveSurfaces,
567 *UnboundedRequestEnabled,
570 ConfigReturnBufferSizeInKByte,
571 MinCompressedBufferSizeInKByte,
572 CompressedBufferSegmentSizeInkByteFinal,
576 RoundedUpMaxSwathSizeBytesY,
577 RoundedUpMaxSwathSizeBytesC,
581 DETBufferSizeInKByte, // per hubp pipe
582 CompressedBufferSizeInkByte);
584 #ifdef __DML_VBA_DEBUG__
585 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
586 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
587 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
588 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
589 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
590 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
593 *ViewportSizeSupport = true;
594 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
596 DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
597 dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
598 #ifdef __DML_VBA_DEBUG__
599 dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
600 DETBufferSizeInKByteForSwathCalculation);
603 if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
604 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
605 SwathHeightY[k] = MaximumSwathHeightY[k];
606 SwathHeightC[k] = MaximumSwathHeightC[k];
607 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
608 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
609 } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
610 RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
611 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
612 SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
613 SwathHeightC[k] = MaximumSwathHeightC[k];
614 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
615 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
616 } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
617 RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
618 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
619 SwathHeightY[k] = MaximumSwathHeightY[k];
620 SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
621 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
622 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
624 SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
625 SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
626 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
627 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
630 if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
631 DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
632 || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
633 SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
634 *ViewportSizeSupport = false;
635 ViewportSizeSupportPerSurface[k] = false;
637 ViewportSizeSupportPerSurface[k] = true;
640 if (SwathHeightC[k] == 0) {
641 #ifdef __DML_VBA_DEBUG__
642 dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
644 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
645 DETBufferSizeC[k] = 0;
646 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
647 #ifdef __DML_VBA_DEBUG__
648 dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
650 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
651 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
653 #ifdef __DML_VBA_DEBUG__
654 dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
656 DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
657 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
660 #ifdef __DML_VBA_DEBUG__
661 dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
662 dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
663 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
664 k, RoundedUpMaxSwathSizeBytesY[k]);
665 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
666 k, RoundedUpMaxSwathSizeBytesC[k]);
667 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY);
668 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC);
669 dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
670 dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
671 dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
672 dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
673 ViewportSizeSupportPerSurface[k]);
677 } // CalculateSwathAndDETConfiguration
679 void dml32_CalculateSwathWidth(
681 unsigned int NumberOfActiveSurfaces,
682 enum source_format_class SourcePixelFormat[],
683 enum dm_rotation_angle SourceRotation[],
684 bool ViewportStationary[],
685 unsigned int ViewportWidth[],
686 unsigned int ViewportHeight[],
687 unsigned int ViewportXStart[],
688 unsigned int ViewportYStart[],
689 unsigned int ViewportXStartC[],
690 unsigned int ViewportYStartC[],
691 unsigned int SurfaceWidthY[],
692 unsigned int SurfaceWidthC[],
693 unsigned int SurfaceHeightY[],
694 unsigned int SurfaceHeightC[],
695 enum odm_combine_mode ODMMode[],
696 unsigned int BytePerPixY[],
697 unsigned int BytePerPixC[],
698 unsigned int Read256BytesBlockHeightY[],
699 unsigned int Read256BytesBlockHeightC[],
700 unsigned int Read256BytesBlockWidthY[],
701 unsigned int Read256BytesBlockWidthC[],
702 unsigned int BlendingAndTiming[],
703 unsigned int HActive[],
705 unsigned int DPPPerSurface[],
708 double SwathWidthdoubleDPPY[],
709 double SwathWidthdoubleDPPC[],
710 double SwathWidthY[], // per-pipe
711 double SwathWidthC[], // per-pipe
712 unsigned int MaximumSwathHeightY[],
713 unsigned int MaximumSwathHeightC[],
714 unsigned int swath_width_luma_ub[], // per-pipe
715 unsigned int swath_width_chroma_ub[]) // per-pipe
718 enum odm_combine_mode MainSurfaceODMMode;
720 unsigned int surface_width_ub_l;
721 unsigned int surface_height_ub_l;
722 unsigned int surface_width_ub_c;
723 unsigned int surface_height_ub_c;
725 #ifdef __DML_VBA_DEBUG__
726 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
727 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
730 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
731 if (!IsVertical(SourceRotation[k]))
732 SwathWidthdoubleDPPY[k] = ViewportWidth[k];
734 SwathWidthdoubleDPPY[k] = ViewportHeight[k];
736 #ifdef __DML_VBA_DEBUG__
737 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
738 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
741 MainSurfaceODMMode = ODMMode[k];
742 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
743 if (BlendingAndTiming[k] == j)
744 MainSurfaceODMMode = ODMMode[j];
747 if (ForceSingleDPP) {
748 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
750 if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
751 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
752 dml_round(HActive[k] / 4.0 * HRatio[k]));
753 } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
754 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
755 dml_round(HActive[k] / 2.0 * HRatio[k]));
756 } else if (DPPPerSurface[k] == 2) {
757 SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
759 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
763 #ifdef __DML_VBA_DEBUG__
764 dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
765 dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
766 dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
767 dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
768 dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
771 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
772 SourcePixelFormat[k] == dm_420_12) {
773 SwathWidthC[k] = SwathWidthY[k] / 2;
774 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
776 SwathWidthC[k] = SwathWidthY[k];
777 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
780 if (ForceSingleDPP == true) {
781 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
782 SwathWidthC[k] = SwathWidthdoubleDPPC[k];
785 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
786 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
787 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
788 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
790 #ifdef __DML_VBA_DEBUG__
791 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
792 dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
793 dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
794 dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
795 dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
796 dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
797 dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
798 dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
799 dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
800 dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
803 if (!IsVertical(SourceRotation[k])) {
804 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
805 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
806 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
807 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
808 dml_floor(ViewportXStart[k] +
810 Read256BytesBlockWidthY[k] - 1,
811 Read256BytesBlockWidthY[k]) -
812 dml_floor(ViewportXStart[k],
813 Read256BytesBlockWidthY[k]));
815 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
816 dml_ceil(SwathWidthY[k] - 1,
817 Read256BytesBlockWidthY[k]) +
818 Read256BytesBlockWidthY[k]);
820 if (BytePerPixC[k] > 0) {
821 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
822 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
823 dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
824 Read256BytesBlockWidthC[k] - 1,
825 Read256BytesBlockWidthC[k]) -
826 dml_floor(ViewportXStartC[k],
827 Read256BytesBlockWidthC[k]));
829 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
830 dml_ceil(SwathWidthC[k] - 1,
831 Read256BytesBlockWidthC[k]) +
832 Read256BytesBlockWidthC[k]);
835 swath_width_chroma_ub[k] = 0;
838 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
839 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
841 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
842 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
843 SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
844 Read256BytesBlockHeightY[k]) -
845 dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
847 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
848 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
850 if (BytePerPixC[k] > 0) {
851 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
852 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
853 dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
854 Read256BytesBlockHeightC[k] - 1,
855 Read256BytesBlockHeightC[k]) -
856 dml_floor(ViewportYStartC[k],
857 Read256BytesBlockHeightC[k]));
859 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
860 dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
861 Read256BytesBlockHeightC[k]);
864 swath_width_chroma_ub[k] = 0;
868 #ifdef __DML_VBA_DEBUG__
869 dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
870 dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
871 dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
872 dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
876 } // CalculateSwathWidth
878 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
879 unsigned int TotalNumberOfActiveDPP,
881 enum output_encoder_class Output,
882 enum dm_swizzle_mode SurfaceTiling,
883 bool CompBufReservedSpaceNeedAdjustment,
884 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
886 bool ret_val = false;
888 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
889 TotalNumberOfActiveDPP == 1 && NoChroma);
890 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
893 if (SurfaceTiling == dm_sw_linear)
896 if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
899 #ifdef __DML_VBA_DEBUG__
900 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, CompBufReservedSpaceNeedAdjustment);
901 dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = %d\n", __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
902 dml_print("DML::%s: ret_val = %d\n", __func__, ret_val);
908 void dml32_CalculateDETBufferSize(
909 unsigned int DETSizeOverride[],
910 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
912 unsigned int NumberOfActiveSurfaces,
913 bool UnboundedRequestEnabled,
914 unsigned int nomDETInKByte,
915 unsigned int MaxTotalDETInKByte,
916 unsigned int ConfigReturnBufferSizeInKByte,
917 unsigned int MinCompressedBufferSizeInKByte,
918 unsigned int CompressedBufferSegmentSizeInkByteFinal,
919 enum source_format_class SourcePixelFormat[],
920 double ReadBandwidthLuma[],
921 double ReadBandwidthChroma[],
922 unsigned int RoundedUpMaxSwathSizeBytesY[],
923 unsigned int RoundedUpMaxSwathSizeBytesC[],
924 unsigned int DPPPerSurface[],
926 unsigned int DETBufferSizeInKByte[],
927 unsigned int *CompressedBufferSizeInkByte)
929 unsigned int DETBufferSizePoolInKByte;
930 unsigned int NextDETBufferPieceInKByte;
931 bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
932 bool NextPotentialSurfaceToAssignDETPieceFound;
933 unsigned int NextSurfaceToAssignDETPiece;
934 double TotalBandwidth;
935 double BandwidthOfSurfacesNotAssignedDETPiece;
936 unsigned int max_minDET;
938 unsigned int minDET_pipe;
941 #ifdef __DML_VBA_DEBUG__
942 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
943 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
944 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
945 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
946 dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
947 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
948 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
949 dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
950 CompressedBufferSegmentSizeInkByteFinal);
953 // Note: Will use default det size if that fits 2 swaths
954 if (UnboundedRequestEnabled) {
955 if (DETSizeOverride[0] > 0) {
956 DETBufferSizeInKByte[0] = DETSizeOverride[0];
958 DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
959 ((double) RoundedUpMaxSwathSizeBytesY[0] +
960 (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
962 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
964 DETBufferSizePoolInKByte = MaxTotalDETInKByte;
965 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
966 DETBufferSizeInKByte[k] = nomDETInKByte;
967 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
968 SourcePixelFormat[k] == dm_420_12) {
969 max_minDET = nomDETInKByte - 64;
971 max_minDET = nomDETInKByte;
976 // add DET resource until can hold 2 full swaths
977 while (minDET <= max_minDET && minDET_pipe == 0) {
978 if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
979 (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
980 minDET_pipe = minDET;
981 minDET = minDET + 64;
984 #ifdef __DML_VBA_DEBUG__
985 dml_print("DML::%s: k=%0d minDET = %d\n", __func__, k, minDET);
986 dml_print("DML::%s: k=%0d max_minDET = %d\n", __func__, k, max_minDET);
987 dml_print("DML::%s: k=%0d minDET_pipe = %d\n", __func__, k, minDET_pipe);
988 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
989 RoundedUpMaxSwathSizeBytesY[k]);
990 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
991 RoundedUpMaxSwathSizeBytesC[k]);
994 if (minDET_pipe == 0) {
995 minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
996 (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
997 #ifdef __DML_VBA_DEBUG__
998 dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
999 __func__, k, minDET_pipe);
1003 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1004 DETBufferSizeInKByte[k] = 0;
1005 } else if (DETSizeOverride[k] > 0) {
1006 DETBufferSizeInKByte[k] = DETSizeOverride[k];
1007 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1008 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
1009 } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
1010 DETBufferSizeInKByte[k] = minDET_pipe;
1011 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1012 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
1015 #ifdef __DML_VBA_DEBUG__
1016 dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
1017 dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
1018 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1019 dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
1024 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1025 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
1026 TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
1028 #ifdef __DML_VBA_DEBUG__
1029 dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
1030 for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
1031 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1032 dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
1033 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
1035 BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
1036 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1038 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1039 DETPieceAssignedToThisSurfaceAlready[k] = true;
1040 } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
1041 (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
1042 ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
1043 DETPieceAssignedToThisSurfaceAlready[k] = true;
1044 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1045 ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1047 DETPieceAssignedToThisSurfaceAlready[k] = false;
1049 #ifdef __DML_VBA_DEBUG__
1050 dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
1051 DETPieceAssignedToThisSurfaceAlready[k]);
1052 dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
1053 BandwidthOfSurfacesNotAssignedDETPiece);
1057 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
1058 NextPotentialSurfaceToAssignDETPieceFound = false;
1059 NextSurfaceToAssignDETPiece = 0;
1061 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1062 #ifdef __DML_VBA_DEBUG__
1063 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
1064 ReadBandwidthLuma[k]);
1065 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
1066 ReadBandwidthChroma[k]);
1067 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
1068 ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1069 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
1070 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1071 dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
1072 NextSurfaceToAssignDETPiece);
1074 if (!DETPieceAssignedToThisSurfaceAlready[k] &&
1075 (!NextPotentialSurfaceToAssignDETPieceFound ||
1076 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
1077 ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1078 ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
1079 NextSurfaceToAssignDETPiece = k;
1080 NextPotentialSurfaceToAssignDETPieceFound = true;
1082 #ifdef __DML_VBA_DEBUG__
1083 dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
1084 __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1085 dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
1086 __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1090 if (NextPotentialSurfaceToAssignDETPieceFound) {
1091 // Note: To show the banker's rounding behavior in VBA and also the fact
1092 // that the DET buffer size varies due to precision issue
1094 //double tmp1 = ((double) DETBufferSizePoolInKByte *
1095 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1096 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1097 // BandwidthOfSurfacesNotAssignedDETPiece /
1098 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1099 //double tmp2 = dml_round((double) DETBufferSizePoolInKByte *
1100 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1101 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1102 //BandwidthOfSurfacesNotAssignedDETPiece /
1103 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1105 //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
1106 //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
1108 NextDETBufferPieceInKByte = dml_min(
1109 dml_round((double) DETBufferSizePoolInKByte *
1110 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1111 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1112 BandwidthOfSurfacesNotAssignedDETPiece /
1113 ((ForceSingleDPP ? 1 :
1114 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
1115 (ForceSingleDPP ? 1 :
1116 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
1117 dml_floor((double) DETBufferSizePoolInKByte,
1118 (ForceSingleDPP ? 1 :
1119 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1121 // Above calculation can assign the entire DET buffer allocation to a single pipe.
1122 // We should limit the per-pipe DET size to the nominal / max per pipe.
1123 if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1124 if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
1125 nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1126 NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
1127 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
1129 // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1130 // already has the max per-pipe value
1131 NextDETBufferPieceInKByte = 0;
1135 #ifdef __DML_VBA_DEBUG__
1136 dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
1137 DETBufferSizePoolInKByte);
1138 dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
1139 NextSurfaceToAssignDETPiece);
1140 dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
1141 NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1142 dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
1143 NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1144 dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
1145 __func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
1146 dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
1147 NextDETBufferPieceInKByte);
1148 dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
1149 __func__, j, NextSurfaceToAssignDETPiece,
1150 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1153 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
1154 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1155 + NextDETBufferPieceInKByte
1156 / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
1157 #ifdef __DML_VBA_DEBUG__
1158 dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1161 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
1162 DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
1163 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1164 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1165 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1168 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1170 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
1172 #ifdef __DML_VBA_DEBUG__
1173 dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1174 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
1175 for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
1176 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
1177 __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1180 } // CalculateDETBufferSize
1182 void dml32_CalculateODMMode(
1183 unsigned int MaximumPixelsPerLinePerDSCUnit,
1184 unsigned int HActive,
1185 enum output_encoder_class Output,
1186 enum odm_combine_policy ODMUse,
1187 double StateDispclk,
1190 unsigned int TotalNumberOfActiveDPP,
1191 unsigned int MaxNumDPP,
1193 double DISPCLKDPPCLKDSCCLKDownSpreading,
1194 double DISPCLKRampingMargin,
1195 double DISPCLKDPPCLKVCOSpeed,
1196 unsigned int NumberOfDSCSlices,
1199 bool *TotalAvailablePipesSupport,
1200 unsigned int *NumberOfDPP,
1201 enum odm_combine_mode *ODMMode,
1202 double *RequiredDISPCLKPerSurface)
1205 double SurfaceRequiredDISPCLKWithoutODMCombine;
1206 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1207 double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1209 SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
1210 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1212 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
1213 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1215 SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
1216 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1218 *TotalAvailablePipesSupport = true;
1219 *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
1221 if (ODMUse == dm_odm_combine_policy_none)
1222 *ODMMode = dm_odm_combine_mode_disabled;
1224 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
1227 // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
1228 // (ODMUse == "" || ODMUse == "CombineAsNeeded")
1230 if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
1231 ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
1232 (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit))
1233 || NumberOfDSCSlices > 8)))) {
1234 if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
1235 *ODMMode = dm_odm_combine_mode_4to1;
1236 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1239 *TotalAvailablePipesSupport = false;
1241 } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
1242 (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
1243 SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
1244 (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit))
1245 || (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) {
1246 if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
1247 *ODMMode = dm_odm_combine_mode_2to1;
1248 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1251 *TotalAvailablePipesSupport = false;
1254 if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
1257 *TotalAvailablePipesSupport = false;
1261 double dml32_CalculateRequiredDispclk(
1262 enum odm_combine_mode ODMMode,
1264 double DISPCLKDPPCLKDSCCLKDownSpreading,
1265 double DISPCLKRampingMargin,
1266 double DISPCLKDPPCLKVCOSpeed,
1269 double RequiredDispclk = 0.;
1270 double PixelClockAfterODM;
1271 double DISPCLKWithRampingRoundedToDFSGranularity;
1272 double DISPCLKWithoutRampingRoundedToDFSGranularity;
1273 double MaxDispclkRoundedDownToDFSGranularity;
1275 if (ODMMode == dm_odm_combine_mode_4to1)
1276 PixelClockAfterODM = PixelClock / 4;
1277 else if (ODMMode == dm_odm_combine_mode_2to1)
1278 PixelClockAfterODM = PixelClock / 2;
1280 PixelClockAfterODM = PixelClock;
1283 DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1284 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1285 * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);
1287 DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1288 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);
1290 MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
1292 if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1293 RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
1294 else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1295 RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
1297 RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
1299 return RequiredDispclk;
1302 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
1308 return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
1310 return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
1313 void dml32_CalculateOutputLink(
1314 double PHYCLKPerState,
1315 double PHYCLKD18PerState,
1316 double PHYCLKD32PerState,
1317 double Downspreading,
1318 bool IsMainSurfaceUsingTheIndicatedTiming,
1319 enum output_encoder_class Output,
1320 enum output_format_class OutputFormat,
1321 unsigned int HTotal,
1322 unsigned int HActive,
1323 double PixelClockBackEnd,
1324 double ForcedOutputLinkBPP,
1325 unsigned int DSCInputBitPerComponent,
1326 unsigned int NumberOfDSCSlices,
1327 double AudioSampleRate,
1328 unsigned int AudioSampleLayout,
1329 enum odm_combine_mode ODMModeNoDSC,
1330 enum odm_combine_mode ODMModeDSC,
1332 unsigned int OutputLinkDPLanes,
1333 enum dm_output_link_dp_rate OutputLinkDPRate,
1337 double *RequiresFEC,
1339 enum dm_output_type *OutputType,
1340 enum dm_output_rate *OutputRate,
1341 unsigned int *RequiredSlots)
1345 *RequiresDSC = false;
1346 *RequiresFEC = false;
1348 *OutputType = dm_output_type_unknown;
1349 *OutputRate = dm_output_rate_unknown;
1351 if (IsMainSurfaceUsingTheIndicatedTiming) {
1352 if (Output == dm_hdmi) {
1353 *RequiresDSC = false;
1354 *RequiresFEC = false;
1355 *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
1356 PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat,
1357 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1358 ODMModeNoDSC, ODMModeDSC, &dummy);
1359 //OutputTypeAndRate = "HDMI";
1360 *OutputType = dm_output_type_hdmi;
1362 } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
1363 if (DSCEnable == true) {
1364 *RequiresDSC = true;
1365 LinkDSCEnable = true;
1366 if (Output == dm_dp || Output == dm_dp2p0)
1367 *RequiresFEC = true;
1369 *RequiresFEC = false;
1371 *RequiresDSC = false;
1372 LinkDSCEnable = false;
1373 if (Output == dm_dp2p0)
1374 *RequiresFEC = true;
1376 *RequiresFEC = false;
1378 if (Output == dm_dp2p0) {
1380 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
1381 PHYCLKD32PerState >= 10000 / 32) {
1382 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1383 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1384 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1385 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1386 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1387 if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true &&
1388 ForcedOutputLinkBPP == 0) {
1389 *RequiresDSC = true;
1390 LinkDSCEnable = true;
1391 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1392 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1393 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1394 OutputFormat, DSCInputBitPerComponent,
1395 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1396 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1398 //OutputTypeAndRate = Output & " UHBR10";
1399 *OutputType = dm_output_type_dp2p0;
1400 *OutputRate = dm_output_rate_dp_rate_uhbr10;
1402 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
1403 *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) {
1404 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1405 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1406 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1407 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1408 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1410 if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
1411 ForcedOutputLinkBPP == 0) {
1412 *RequiresDSC = true;
1413 LinkDSCEnable = true;
1414 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1415 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1416 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1417 OutputFormat, DSCInputBitPerComponent,
1418 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1419 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1421 //OutputTypeAndRate = Output & " UHBR13p5";
1422 *OutputType = dm_output_type_dp2p0;
1423 *OutputRate = dm_output_rate_dp_rate_uhbr13p5;
1425 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
1426 *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
1427 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1428 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1429 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1430 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1431 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1432 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1433 *RequiresDSC = true;
1434 LinkDSCEnable = true;
1435 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1436 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1437 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1438 OutputFormat, DSCInputBitPerComponent,
1439 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1440 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1442 //OutputTypeAndRate = Output & " UHBR20";
1443 *OutputType = dm_output_type_dp2p0;
1444 *OutputRate = dm_output_rate_dp_rate_uhbr20;
1448 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
1449 PHYCLKPerState >= 270) {
1450 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1451 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1452 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1453 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1454 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1455 if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
1456 ForcedOutputLinkBPP == 0) {
1457 *RequiresDSC = true;
1458 LinkDSCEnable = true;
1459 if (Output == dm_dp)
1460 *RequiresFEC = true;
1461 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1462 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1463 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1464 OutputFormat, DSCInputBitPerComponent,
1465 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1466 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1468 //OutputTypeAndRate = Output & " HBR";
1469 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1470 *OutputRate = dm_output_rate_dp_rate_hbr;
1472 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
1473 *OutBpp == 0 && PHYCLKPerState >= 540) {
1474 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1475 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1476 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1477 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1478 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1480 if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
1481 ForcedOutputLinkBPP == 0) {
1482 *RequiresDSC = true;
1483 LinkDSCEnable = true;
1484 if (Output == dm_dp)
1485 *RequiresFEC = true;
1487 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1488 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1489 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1490 OutputFormat, DSCInputBitPerComponent,
1491 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1492 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1494 //OutputTypeAndRate = Output & " HBR2";
1495 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1496 *OutputRate = dm_output_rate_dp_rate_hbr2;
1498 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
1499 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1500 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1501 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1502 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
1503 AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
1506 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1507 *RequiresDSC = true;
1508 LinkDSCEnable = true;
1509 if (Output == dm_dp)
1510 *RequiresFEC = true;
1512 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1513 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1514 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1515 OutputFormat, DSCInputBitPerComponent,
1516 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1517 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1519 //OutputTypeAndRate = Output & " HBR3";
1520 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1521 *OutputRate = dm_output_rate_dp_rate_hbr3;
1528 void dml32_CalculateDPPCLK(
1529 unsigned int NumberOfActiveSurfaces,
1530 double DISPCLKDPPCLKDSCCLKDownSpreading,
1531 double DISPCLKDPPCLKVCOSpeed,
1532 double DPPCLKUsingSingleDPP[],
1533 unsigned int DPPPerSurface[],
1536 double *GlobalDPPCLK,
1541 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1542 Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1543 *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
1545 *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
1546 for (k = 0; k < NumberOfActiveSurfaces; ++k)
1547 Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
1550 double dml32_TruncToValidBPP(
1553 unsigned int HTotal,
1554 unsigned int HActive,
1558 enum output_encoder_class Output,
1559 enum output_format_class Format,
1560 unsigned int DSCInputBitPerComponent,
1561 unsigned int DSCSlices,
1562 unsigned int AudioRate,
1563 unsigned int AudioLayout,
1564 enum odm_combine_mode ODMModeNoDSC,
1565 enum odm_combine_mode ODMModeDSC,
1567 unsigned int *RequiredSlots)
1570 unsigned int MinDSCBPP;
1572 unsigned int NonDSCBPP0;
1573 unsigned int NonDSCBPP1;
1574 unsigned int NonDSCBPP2;
1575 unsigned int NonDSCBPP3;
1577 if (Format == dm_420) {
1582 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
1583 } else if (Format == dm_444) {
1589 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
1591 if (Output == dm_hdmi) {
1600 if (Format == dm_n422) {
1602 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
1605 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
1608 if (Output == dm_dp2p0) {
1609 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
1610 } else if (DSCEnable && Output == dm_dp) {
1611 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
1613 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
1617 if (ODMModeDSC == dm_odm_combine_mode_4to1)
1618 MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1619 else if (ODMModeDSC == dm_odm_combine_mode_2to1)
1620 MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1621 else if (ODMModeDSC == dm_odm_split_mode_1to2)
1622 MaxLinkBPP = 2 * MaxLinkBPP;
1624 if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
1625 MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1626 else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
1627 MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1628 else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
1629 MaxLinkBPP = 2 * MaxLinkBPP;
1632 if (DesiredBPP == 0) {
1634 if (MaxLinkBPP < MinDSCBPP)
1636 else if (MaxLinkBPP >= MaxDSCBPP)
1639 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
1641 if (MaxLinkBPP >= NonDSCBPP3)
1643 else if (MaxLinkBPP >= NonDSCBPP2)
1645 else if (MaxLinkBPP >= NonDSCBPP1)
1647 else if (MaxLinkBPP >= NonDSCBPP0)
1653 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
1654 DesiredBPP == NonDSCBPP0 || DesiredBPP == NonDSCBPP3)) ||
1655 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
1661 *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
1664 } // TruncToValidBPP
1666 double dml32_RequiredDTBCLK(
1669 enum output_format_class OutputFormat,
1671 unsigned int DSCSlices,
1672 unsigned int HTotal,
1673 unsigned int HActive,
1674 unsigned int AudioRate,
1675 unsigned int AudioLayout)
1677 double PixelWordRate;
1680 double AverageTribyteRate;
1681 double HActiveTribyteRate;
1683 if (DSCEnable != true)
1684 return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
1686 PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2);
1687 HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
1688 dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
1690 dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
1691 AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
1692 HActiveTribyteRate = PixelWordRate * HCActive / HActive;
1693 return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
1696 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
1697 enum odm_combine_mode ODMMode,
1698 unsigned int DSCInputBitPerComponent,
1700 unsigned int HActive,
1701 unsigned int HTotal,
1702 unsigned int NumberOfDSCSlices,
1703 enum output_format_class OutputFormat,
1704 enum output_encoder_class Output,
1706 double PixelClockBackEnd)
1708 unsigned int DSCDelayRequirement_val;
1710 if (DSCEnabled == true && OutputBpp != 0) {
1711 if (ODMMode == dm_odm_combine_mode_4to1) {
1712 DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1713 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
1714 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1715 } else if (ODMMode == dm_odm_combine_mode_2to1) {
1716 DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1717 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
1718 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1720 DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1721 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
1722 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
1725 DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
1726 dml_ceil(DSCDelayRequirement_val / HActive, 1);
1728 DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
1731 DSCDelayRequirement_val = 0;
1734 #ifdef __DML_VBA_DEBUG__
1735 dml_print("DML::%s: DSCEnabled = %d\n", __func__, DSCEnabled);
1736 dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
1737 dml_print("DML::%s: HActive = %d\n", __func__, HActive);
1738 dml_print("DML::%s: OutputFormat = %d\n", __func__, OutputFormat);
1739 dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
1740 dml_print("DML::%s: NumberOfDSCSlices = %d\n", __func__, NumberOfDSCSlices);
1741 dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
1744 return DSCDelayRequirement_val;
1747 void dml32_CalculateSurfaceSizeInMall(
1748 unsigned int NumberOfActiveSurfaces,
1749 unsigned int MALLAllocatedForDCN,
1750 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1752 bool ViewportStationary[],
1753 unsigned int ViewportXStartY[],
1754 unsigned int ViewportYStartY[],
1755 unsigned int ViewportXStartC[],
1756 unsigned int ViewportYStartC[],
1757 unsigned int ViewportWidthY[],
1758 unsigned int ViewportHeightY[],
1759 unsigned int BytesPerPixelY[],
1760 unsigned int ViewportWidthC[],
1761 unsigned int ViewportHeightC[],
1762 unsigned int BytesPerPixelC[],
1763 unsigned int SurfaceWidthY[],
1764 unsigned int SurfaceWidthC[],
1765 unsigned int SurfaceHeightY[],
1766 unsigned int SurfaceHeightC[],
1767 unsigned int Read256BytesBlockWidthY[],
1768 unsigned int Read256BytesBlockWidthC[],
1769 unsigned int Read256BytesBlockHeightY[],
1770 unsigned int Read256BytesBlockHeightC[],
1771 unsigned int ReadBlockWidthY[],
1772 unsigned int ReadBlockWidthC[],
1773 unsigned int ReadBlockHeightY[],
1774 unsigned int ReadBlockHeightC[],
1777 unsigned int SurfaceSizeInMALL[],
1778 bool *ExceededMALLSize)
1780 unsigned int TotalSurfaceSizeInMALL = 0;
1783 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1784 if (ViewportStationary[k]) {
1785 SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
1786 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
1787 ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
1788 ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
1789 ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1790 ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
1791 dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];
1793 if (ReadBlockWidthC[k] > 0) {
1794 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1795 dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
1796 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
1797 ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
1798 dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
1799 dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
1800 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1801 ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
1802 dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
1805 if (DCCEnable[k] == true) {
1806 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1807 dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]),
1808 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
1809 Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
1810 - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
1811 * dml_min(dml_ceil(SurfaceHeightY[k], 8 *
1812 Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1813 ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
1814 Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8
1815 * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256;
1816 if (Read256BytesBlockWidthC[k] > 0) {
1817 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1818 dml_min(dml_ceil(SurfaceWidthC[k], 8 *
1819 Read256BytesBlockWidthC[k]),
1820 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
1821 * Read256BytesBlockWidthC[k] - 1, 8 *
1822 Read256BytesBlockWidthC[k]) -
1823 dml_floor(ViewportXStartC[k], 8 *
1824 Read256BytesBlockWidthC[k])) *
1825 dml_min(dml_ceil(SurfaceHeightC[k], 8 *
1826 Read256BytesBlockHeightC[k]),
1827 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1828 8 * Read256BytesBlockHeightC[k] - 1, 8 *
1829 Read256BytesBlockHeightC[k]) -
1830 dml_floor(ViewportYStartC[k], 8 *
1831 Read256BytesBlockHeightC[k])) *
1832 BytesPerPixelC[k] / 256;
1836 SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
1837 ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
1838 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
1839 ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
1841 if (ReadBlockWidthC[k] > 0) {
1842 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1843 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
1844 ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
1845 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
1846 ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
1849 if (DCCEnable[k] == true) {
1850 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1851 dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 *
1852 Read256BytesBlockWidthY[k] - 1), 8 *
1853 Read256BytesBlockWidthY[k]) *
1854 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
1855 Read256BytesBlockHeightY[k] - 1), 8 *
1856 Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256;
1858 if (Read256BytesBlockWidthC[k] > 0) {
1859 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1860 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 *
1861 Read256BytesBlockWidthC[k] - 1), 8 *
1862 Read256BytesBlockWidthC[k]) *
1863 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
1864 Read256BytesBlockHeightC[k] - 1), 8 *
1865 Read256BytesBlockHeightC[k]) *
1866 BytesPerPixelC[k] / 256;
1872 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1873 if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
1874 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
1876 *ExceededMALLSize = (TotalSurfaceSizeInMALL <= MALLAllocatedForDCN * 1024 * 1024 ? false : true);
1877 } // CalculateSurfaceSizeInMall
1879 void dml32_CalculateVMRowAndSwath(
1880 unsigned int NumberOfActiveSurfaces,
1882 unsigned int SurfaceSizeInMALL[],
1883 unsigned int PTEBufferSizeInRequestsLuma,
1884 unsigned int PTEBufferSizeInRequestsChroma,
1885 unsigned int DCCMetaBufferSizeBytes,
1886 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1887 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
1888 unsigned int MALLAllocatedForDCN,
1889 double SwathWidthY[],
1890 double SwathWidthC[],
1893 unsigned int HostVMMaxNonCachedPageTableLevels,
1894 unsigned int GPUVMMaxPageTableLevels,
1895 unsigned int GPUVMMinPageSizeKBytes[],
1896 unsigned int HostVMMinPageSize,
1899 bool PTEBufferSizeNotExceeded[],
1900 bool DCCMetaBufferSizeNotExceeded[],
1901 unsigned int dpte_row_width_luma_ub[],
1902 unsigned int dpte_row_width_chroma_ub[],
1903 unsigned int dpte_row_height_luma[],
1904 unsigned int dpte_row_height_chroma[],
1905 unsigned int dpte_row_height_linear_luma[], // VBA_DELTA
1906 unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA
1907 unsigned int meta_req_width[],
1908 unsigned int meta_req_width_chroma[],
1909 unsigned int meta_req_height[],
1910 unsigned int meta_req_height_chroma[],
1911 unsigned int meta_row_width[],
1912 unsigned int meta_row_width_chroma[],
1913 unsigned int meta_row_height[],
1914 unsigned int meta_row_height_chroma[],
1915 unsigned int vm_group_bytes[],
1916 unsigned int dpte_group_bytes[],
1917 unsigned int PixelPTEReqWidthY[],
1918 unsigned int PixelPTEReqHeightY[],
1919 unsigned int PTERequestSizeY[],
1920 unsigned int PixelPTEReqWidthC[],
1921 unsigned int PixelPTEReqHeightC[],
1922 unsigned int PTERequestSizeC[],
1923 unsigned int dpde0_bytes_per_frame_ub_l[],
1924 unsigned int meta_pte_bytes_per_frame_ub_l[],
1925 unsigned int dpde0_bytes_per_frame_ub_c[],
1926 unsigned int meta_pte_bytes_per_frame_ub_c[],
1927 double PrefetchSourceLinesY[],
1928 double PrefetchSourceLinesC[],
1929 double VInitPreFillY[],
1930 double VInitPreFillC[],
1931 unsigned int MaxNumSwathY[],
1932 unsigned int MaxNumSwathC[],
1933 double meta_row_bw[],
1934 double dpte_row_bw[],
1935 double PixelPTEBytesPerRow[],
1936 double PDEAndMetaPTEBytesFrame[],
1937 double MetaRowByte[],
1938 bool use_one_row_for_frame[],
1939 bool use_one_row_for_frame_flip[],
1940 bool UsesMALLForStaticScreen[],
1941 bool PTE_BUFFER_MODE[],
1942 unsigned int BIGK_FRAGMENT_SIZE[])
1945 unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
1946 unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
1947 unsigned int PDEAndMetaPTEBytesFrameY;
1948 unsigned int PDEAndMetaPTEBytesFrameC;
1949 unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
1950 unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
1951 unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
1952 unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
1953 unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
1954 unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
1955 unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1956 unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
1957 unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1958 unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
1959 bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
1961 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1962 if (HostVMEnable == true) {
1963 vm_group_bytes[k] = 512;
1964 dpte_group_bytes[k] = 512;
1965 } else if (GPUVMEnable == true) {
1966 vm_group_bytes[k] = 2048;
1967 if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
1968 dpte_group_bytes[k] = 512;
1970 dpte_group_bytes[k] = 2048;
1972 vm_group_bytes[k] = 0;
1973 dpte_group_bytes[k] = 0;
1976 if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
1977 myPipe[k].SourcePixelFormat == dm_420_12 ||
1978 myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
1979 if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
1980 !IsVertical(myPipe[k].SourceRotation)) {
1981 PTEBufferSizeInRequestsForLuma[k] =
1982 (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
1983 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
1985 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
1986 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
1989 PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
1990 myPipe[k].ViewportStationary,
1991 myPipe[k].DCCEnable,
1992 myPipe[k].DPPPerSurface,
1993 myPipe[k].BlockHeight256BytesC,
1994 myPipe[k].BlockWidth256BytesC,
1995 myPipe[k].SourcePixelFormat,
1996 myPipe[k].SurfaceTiling,
1997 myPipe[k].BytePerPixelC,
1998 myPipe[k].SourceRotation,
2000 myPipe[k].ViewportHeightChroma,
2001 myPipe[k].ViewportXStartC,
2002 myPipe[k].ViewportYStartC,
2005 HostVMMaxNonCachedPageTableLevels,
2006 GPUVMMaxPageTableLevels,
2007 GPUVMMinPageSizeKBytes[k],
2009 PTEBufferSizeInRequestsForChroma[k],
2011 myPipe[k].DCCMetaPitchC,
2012 myPipe[k].BlockWidthC,
2013 myPipe[k].BlockHeightC,
2017 &PixelPTEBytesPerRowC[k],
2018 &dpte_row_width_chroma_ub[k],
2019 &dpte_row_height_chroma[k],
2020 &dpte_row_height_linear_chroma[k],
2021 &PixelPTEBytesPerRowC_one_row_per_frame[k],
2022 &dpte_row_width_chroma_ub_one_row_per_frame[k],
2023 &dpte_row_height_chroma_one_row_per_frame[k],
2024 &meta_req_width_chroma[k],
2025 &meta_req_height_chroma[k],
2026 &meta_row_width_chroma[k],
2027 &meta_row_height_chroma[k],
2028 &PixelPTEReqWidthC[k],
2029 &PixelPTEReqHeightC[k],
2030 &PTERequestSizeC[k],
2031 &dpde0_bytes_per_frame_ub_c[k],
2032 &meta_pte_bytes_per_frame_ub_c[k]);
2034 PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
2035 myPipe[k].VRatioChroma,
2036 myPipe[k].VTapsChroma,
2037 myPipe[k].InterlaceEnable,
2038 myPipe[k].ProgressiveToInterlaceUnitInOPP,
2039 myPipe[k].SwathHeightC,
2040 myPipe[k].SourceRotation,
2041 myPipe[k].ViewportStationary,
2043 myPipe[k].ViewportHeightChroma,
2044 myPipe[k].ViewportXStartC,
2045 myPipe[k].ViewportYStartC,
2051 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
2052 PTEBufferSizeInRequestsForChroma[k] = 0;
2053 PixelPTEBytesPerRowC[k] = 0;
2054 PDEAndMetaPTEBytesFrameC = 0;
2055 MetaRowByteC[k] = 0;
2056 MaxNumSwathC[k] = 0;
2057 PrefetchSourceLinesC[k] = 0;
2058 dpte_row_height_chroma_one_row_per_frame[k] = 0;
2059 dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
2060 PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
2063 PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
2064 myPipe[k].ViewportStationary,
2065 myPipe[k].DCCEnable,
2066 myPipe[k].DPPPerSurface,
2067 myPipe[k].BlockHeight256BytesY,
2068 myPipe[k].BlockWidth256BytesY,
2069 myPipe[k].SourcePixelFormat,
2070 myPipe[k].SurfaceTiling,
2071 myPipe[k].BytePerPixelY,
2072 myPipe[k].SourceRotation,
2074 myPipe[k].ViewportHeight,
2075 myPipe[k].ViewportXStart,
2076 myPipe[k].ViewportYStart,
2079 HostVMMaxNonCachedPageTableLevels,
2080 GPUVMMaxPageTableLevels,
2081 GPUVMMinPageSizeKBytes[k],
2083 PTEBufferSizeInRequestsForLuma[k],
2085 myPipe[k].DCCMetaPitchY,
2086 myPipe[k].BlockWidthY,
2087 myPipe[k].BlockHeightY,
2091 &PixelPTEBytesPerRowY[k],
2092 &dpte_row_width_luma_ub[k],
2093 &dpte_row_height_luma[k],
2094 &dpte_row_height_linear_luma[k],
2095 &PixelPTEBytesPerRowY_one_row_per_frame[k],
2096 &dpte_row_width_luma_ub_one_row_per_frame[k],
2097 &dpte_row_height_luma_one_row_per_frame[k],
2099 &meta_req_height[k],
2101 &meta_row_height[k],
2102 &PixelPTEReqWidthY[k],
2103 &PixelPTEReqHeightY[k],
2104 &PTERequestSizeY[k],
2105 &dpde0_bytes_per_frame_ub_l[k],
2106 &meta_pte_bytes_per_frame_ub_l[k]);
2108 PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
2111 myPipe[k].InterlaceEnable,
2112 myPipe[k].ProgressiveToInterlaceUnitInOPP,
2113 myPipe[k].SwathHeightY,
2114 myPipe[k].SourceRotation,
2115 myPipe[k].ViewportStationary,
2117 myPipe[k].ViewportHeight,
2118 myPipe[k].ViewportXStart,
2119 myPipe[k].ViewportYStart,
2125 PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2126 MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];
2128 if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
2129 PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
2130 PTEBufferSizeNotExceeded[k] = true;
2132 PTEBufferSizeNotExceeded[k] = false;
2135 one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
2136 PTEBufferSizeInRequestsForLuma[k] &&
2137 PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
2140 dml32_CalculateMALLUseForStaticScreen(
2141 NumberOfActiveSurfaces,
2142 MALLAllocatedForDCN,
2143 UseMALLForStaticScreen, // mode
2145 one_row_per_frame_fits_in_buffer,
2147 UsesMALLForStaticScreen); // boolen
2149 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2150 PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2151 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2152 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2153 (GPUVMMinPageSizeKBytes[k] > 64);
2154 BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
2157 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2158 #ifdef __DML_VBA_DEBUG__
2159 dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n", __func__, k, SurfaceSizeInMALL[k]);
2160 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]);
2162 use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2163 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2164 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2165 (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));
2167 use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
2168 !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
2170 if (use_one_row_for_frame[k]) {
2171 dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
2172 dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
2173 PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
2174 dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
2175 dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
2176 PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
2177 PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
2180 if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
2181 DCCMetaBufferSizeNotExceeded[k] = true;
2183 DCCMetaBufferSizeNotExceeded[k] = false;
2185 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
2186 if (use_one_row_for_frame[k])
2187 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
2189 dml32_CalculateRowBandwidth(
2191 myPipe[k].SourcePixelFormat,
2193 myPipe[k].VRatioChroma,
2194 myPipe[k].DCCEnable,
2195 myPipe[k].HTotal / myPipe[k].PixelClock,
2196 MetaRowByteY[k], MetaRowByteC[k],
2198 meta_row_height_chroma[k],
2199 PixelPTEBytesPerRowY[k],
2200 PixelPTEBytesPerRowC[k],
2201 dpte_row_height_luma[k],
2202 dpte_row_height_chroma[k],
2207 #ifdef __DML_VBA_DEBUG__
2208 dml_print("DML::%s: k=%d, use_one_row_for_frame = %d\n", __func__, k, use_one_row_for_frame[k]);
2209 dml_print("DML::%s: k=%d, use_one_row_for_frame_flip = %d\n",
2210 __func__, k, use_one_row_for_frame_flip[k]);
2211 dml_print("DML::%s: k=%d, UseMALLForPStateChange = %d\n",
2212 __func__, k, UseMALLForPStateChange[k]);
2213 dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]);
2214 dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n",
2215 __func__, k, dpte_row_width_luma_ub[k]);
2216 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]);
2217 dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n",
2218 __func__, k, dpte_row_height_chroma[k]);
2219 dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n",
2220 __func__, k, dpte_row_width_chroma_ub[k]);
2221 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]);
2222 dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]);
2223 dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n",
2224 __func__, k, PTEBufferSizeNotExceeded[k]);
2225 dml_print("DML::%s: k=%d, PTE_BUFFER_MODE = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
2226 dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
2229 } // CalculateVMRowAndSwath
2231 unsigned int dml32_CalculateVMAndRowBytes(
2232 bool ViewportStationary,
2234 unsigned int NumberOfDPPs,
2235 unsigned int BlockHeight256Bytes,
2236 unsigned int BlockWidth256Bytes,
2237 enum source_format_class SourcePixelFormat,
2238 unsigned int SurfaceTiling,
2239 unsigned int BytePerPixel,
2240 enum dm_rotation_angle SourceRotation,
2242 unsigned int ViewportHeight,
2243 unsigned int ViewportXStart,
2244 unsigned int ViewportYStart,
2247 unsigned int HostVMMaxNonCachedPageTableLevels,
2248 unsigned int GPUVMMaxPageTableLevels,
2249 unsigned int GPUVMMinPageSizeKBytes,
2250 unsigned int HostVMMinPageSize,
2251 unsigned int PTEBufferSizeInRequests,
2253 unsigned int DCCMetaPitch,
2254 unsigned int MacroTileWidth,
2255 unsigned int MacroTileHeight,
2258 unsigned int *MetaRowByte,
2259 unsigned int *PixelPTEBytesPerRow,
2260 unsigned int *dpte_row_width_ub,
2261 unsigned int *dpte_row_height,
2262 unsigned int *dpte_row_height_linear,
2263 unsigned int *PixelPTEBytesPerRow_one_row_per_frame,
2264 unsigned int *dpte_row_width_ub_one_row_per_frame,
2265 unsigned int *dpte_row_height_one_row_per_frame,
2266 unsigned int *MetaRequestWidth,
2267 unsigned int *MetaRequestHeight,
2268 unsigned int *meta_row_width,
2269 unsigned int *meta_row_height,
2270 unsigned int *PixelPTEReqWidth,
2271 unsigned int *PixelPTEReqHeight,
2272 unsigned int *PTERequestSize,
2273 unsigned int *DPDE0BytesFrame,
2274 unsigned int *MetaPTEBytesFrame)
2276 unsigned int MPDEBytesFrame;
2277 unsigned int DCCMetaSurfaceBytes;
2278 unsigned int ExtraDPDEBytesFrame;
2279 unsigned int PDEAndMetaPTEBytesFrame;
2280 unsigned int HostVMDynamicLevels = 0;
2281 unsigned int MacroTileSizeBytes;
2282 unsigned int vp_height_meta_ub;
2283 unsigned int vp_height_dpte_ub;
2284 unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
2286 if (GPUVMEnable == true && HostVMEnable == true) {
2287 if (HostVMMinPageSize < 2048)
2288 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
2289 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
2290 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
2292 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
2295 *MetaRequestHeight = 8 * BlockHeight256Bytes;
2296 *MetaRequestWidth = 8 * BlockWidth256Bytes;
2297 if (SurfaceTiling == dm_sw_linear) {
2298 *meta_row_height = 32;
2299 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
2300 - dml_floor(ViewportXStart, *MetaRequestWidth);
2301 } else if (!IsVertical(SourceRotation)) {
2302 *meta_row_height = *MetaRequestHeight;
2303 if (ViewportStationary && NumberOfDPPs == 1) {
2304 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
2305 *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
2307 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
2309 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
2311 *meta_row_height = *MetaRequestWidth;
2312 if (ViewportStationary && NumberOfDPPs == 1) {
2313 *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
2314 *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
2316 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
2318 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
2321 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2322 vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
2323 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
2324 } else if (!IsVertical(SourceRotation)) {
2325 vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2327 vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2330 DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
2332 if (GPUVMEnable == true) {
2333 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
2334 (8 * 4.0 * 1024), 1) + 1) * 64;
2335 MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
2337 *MetaPTEBytesFrame = 0;
2341 if (DCCEnable != true) {
2342 *MetaPTEBytesFrame = 0;
2347 MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
2349 if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
2350 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2351 vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
2352 MacroTileHeight - 1, MacroTileHeight) -
2353 dml_floor(ViewportYStart, MacroTileHeight);
2354 } else if (!IsVertical(SourceRotation)) {
2355 vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
2357 vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
2359 *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
2360 (8 * 2097152), 1) + 1);
2361 ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
2363 *DPDE0BytesFrame = 0;
2364 ExtraDPDEBytesFrame = 0;
2365 vp_height_dpte_ub = 0;
2368 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2370 #ifdef __DML_VBA_DEBUG__
2371 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
2372 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
2373 dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
2374 dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
2375 dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
2376 dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
2377 dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
2378 dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
2379 dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
2380 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
2381 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
2382 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
2383 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
2384 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2385 dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
2386 dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
2387 dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
2390 if (HostVMEnable == true)
2391 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2393 if (SurfaceTiling == dm_sw_linear) {
2394 *PixelPTEReqHeight = 1;
2395 *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2396 PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2397 *PTERequestSize = 64;
2398 } else if (GPUVMMinPageSizeKBytes == 4) {
2399 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2400 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2401 *PTERequestSize = 128;
2403 *PixelPTEReqHeight = MacroTileHeight;
2404 *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
2405 *PTERequestSize = 64;
2407 #ifdef __DML_VBA_DEBUG__
2408 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2409 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
2410 dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
2411 dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
2412 dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
2413 dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
2414 dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
2417 *dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
2418 *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
2419 (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
2420 (double) *PixelPTEReqWidth;
2421 *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
2424 if (SurfaceTiling == dm_sw_linear) {
2425 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2426 *PixelPTEReqWidth / Pitch), 1));
2427 #ifdef __DML_VBA_DEBUG__
2428 dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
2429 PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
2430 dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
2431 dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
2432 dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
2433 dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2434 dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
2435 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2436 *PixelPTEReqWidth / Pitch), 1));
2437 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2439 *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
2440 (double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
2441 *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
2443 // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
2444 *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2445 PixelPTEReqWidth_linear / Pitch), 1);
2446 if (*dpte_row_height_linear > 128)
2447 *dpte_row_height_linear = 128;
2449 } else if (!IsVertical(SourceRotation)) {
2450 *dpte_row_height = *PixelPTEReqHeight;
2452 if (GPUVMMinPageSizeKBytes > 64) {
2453 *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
2454 *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2455 } else if (ViewportStationary && (NumberOfDPPs == 1)) {
2456 *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
2457 *PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
2458 dml_floor(ViewportXStart, *PixelPTEReqWidth);
2460 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
2464 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2466 *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);
2468 if (ViewportStationary && (NumberOfDPPs == 1)) {
2469 *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
2470 *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
2472 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
2473 * *PixelPTEReqHeight;
2476 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2479 if (GPUVMEnable != true)
2480 *PixelPTEBytesPerRow = 0;
2481 if (HostVMEnable == true)
2482 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2484 #ifdef __DML_VBA_DEBUG__
2485 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2486 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2487 dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
2488 dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
2489 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
2490 dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
2491 dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
2492 dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
2493 __func__, *dpte_row_width_ub_one_row_per_frame);
2494 dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
2495 __func__, *PixelPTEBytesPerRow_one_row_per_frame);
2496 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
2497 *MetaPTEBytesFrame);
2500 return PDEAndMetaPTEBytesFrame;
2501 } // CalculateVMAndRowBytes
2503 double dml32_CalculatePrefetchSourceLines(
2507 bool ProgressiveToInterlaceUnitInOPP,
2508 unsigned int SwathHeight,
2509 enum dm_rotation_angle SourceRotation,
2510 bool ViewportStationary,
2512 unsigned int ViewportHeight,
2513 unsigned int ViewportXStart,
2514 unsigned int ViewportYStart,
2517 double *VInitPreFill,
2518 unsigned int *MaxNumSwath)
2521 unsigned int vp_start_rot;
2522 unsigned int sw0_tmp;
2523 unsigned int MaxPartialSwath;
2526 #ifdef __DML_VBA_DEBUG__
2527 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
2528 dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
2529 dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
2530 dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
2531 dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
2532 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
2534 if (ProgressiveToInterlaceUnitInOPP)
2535 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
2537 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
2539 if (ViewportStationary) {
2540 if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
2541 vp_start_rot = SwathHeight -
2542 (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
2543 } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
2544 vp_start_rot = ViewportXStart;
2545 } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
2546 vp_start_rot = SwathHeight -
2547 (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
2549 vp_start_rot = ViewportYStart;
2551 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
2552 if (sw0_tmp < *VInitPreFill)
2553 *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
2556 MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
2558 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
2559 if (*VInitPreFill > 1)
2560 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
2562 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
2564 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
2566 #ifdef __DML_VBA_DEBUG__
2567 dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
2568 dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
2569 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
2570 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
2571 dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
2575 } // CalculatePrefetchSourceLines
2577 void dml32_CalculateMALLUseForStaticScreen(
2578 unsigned int NumberOfActiveSurfaces,
2579 unsigned int MALLAllocatedForDCNFinal,
2580 enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
2581 unsigned int SurfaceSizeInMALL[],
2582 bool one_row_per_frame_fits_in_buffer[],
2585 bool UsesMALLForStaticScreen[])
2588 unsigned int SurfaceToAddToMALL;
2589 bool CanAddAnotherSurfaceToMALL;
2590 unsigned int TotalSurfaceSizeInMALL;
2592 TotalSurfaceSizeInMALL = 0;
2593 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2594 UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
2595 if (UsesMALLForStaticScreen[k])
2596 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
2597 #ifdef __DML_VBA_DEBUG__
2598 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]);
2599 dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n", __func__, k, TotalSurfaceSizeInMALL);
2603 SurfaceToAddToMALL = 0;
2604 CanAddAnotherSurfaceToMALL = true;
2605 while (CanAddAnotherSurfaceToMALL) {
2606 CanAddAnotherSurfaceToMALL = false;
2607 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2608 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
2609 !UsesMALLForStaticScreen[k] &&
2610 UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
2611 one_row_per_frame_fits_in_buffer[k] &&
2612 (!CanAddAnotherSurfaceToMALL ||
2613 SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
2614 CanAddAnotherSurfaceToMALL = true;
2615 SurfaceToAddToMALL = k;
2616 #ifdef __DML_VBA_DEBUG__
2617 dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
2618 __func__, k, UseMALLForStaticScreen[k]);
2622 if (CanAddAnotherSurfaceToMALL) {
2623 UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
2624 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
2626 #ifdef __DML_VBA_DEBUG__
2627 dml_print("DML::%s: SurfaceToAddToMALL = %d\n", __func__, SurfaceToAddToMALL);
2628 dml_print("DML::%s: TotalSurfaceSizeInMALL = %d\n", __func__, TotalSurfaceSizeInMALL);
2635 void dml32_CalculateRowBandwidth(
2637 enum source_format_class SourcePixelFormat,
2639 double VRatioChroma,
2642 unsigned int MetaRowByteLuma,
2643 unsigned int MetaRowByteChroma,
2644 unsigned int meta_row_height_luma,
2645 unsigned int meta_row_height_chroma,
2646 unsigned int PixelPTEBytesPerRowLuma,
2647 unsigned int PixelPTEBytesPerRowChroma,
2648 unsigned int dpte_row_height_luma,
2649 unsigned int dpte_row_height_chroma,
2651 double *meta_row_bw,
2652 double *dpte_row_bw)
2654 if (DCCEnable != true) {
2656 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2657 SourcePixelFormat == dm_rgbe_alpha) {
2658 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
2659 MetaRowByteChroma / (meta_row_height_chroma * LineTime);
2661 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
2664 if (GPUVMEnable != true) {
2666 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2667 SourcePixelFormat == dm_rgbe_alpha) {
2668 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
2669 VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
2671 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
2675 double dml32_CalculateUrgentLatency(
2676 double UrgentLatencyPixelDataOnly,
2677 double UrgentLatencyPixelMixedWithVMData,
2678 double UrgentLatencyVMDataOnly,
2679 bool DoUrgentLatencyAdjustment,
2680 double UrgentLatencyAdjustmentFabricClockComponent,
2681 double UrgentLatencyAdjustmentFabricClockReference,
2686 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
2687 if (DoUrgentLatencyAdjustment == true) {
2688 ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
2689 (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
2694 void dml32_CalculateUrgentBurstFactor(
2695 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
2696 unsigned int swath_width_luma_ub,
2697 unsigned int swath_width_chroma_ub,
2698 unsigned int SwathHeightY,
2699 unsigned int SwathHeightC,
2701 double UrgentLatency,
2702 double CursorBufferSize,
2703 unsigned int CursorWidth,
2704 unsigned int CursorBPP,
2707 double BytePerPixelInDETY,
2708 double BytePerPixelInDETC,
2709 unsigned int DETBufferSizeY,
2710 unsigned int DETBufferSizeC,
2712 double *UrgentBurstFactorCursor,
2713 double *UrgentBurstFactorLuma,
2714 double *UrgentBurstFactorChroma,
2715 bool *NotEnoughUrgentLatencyHiding)
2717 double LinesInDETLuma;
2718 double LinesInDETChroma;
2719 unsigned int LinesInCursorBuffer;
2720 double CursorBufferSizeInTime;
2721 double DETBufferSizeInTimeLuma;
2722 double DETBufferSizeInTimeChroma;
2724 *NotEnoughUrgentLatencyHiding = 0;
2726 if (CursorWidth > 0) {
2727 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
2728 (CursorWidth * CursorBPP / 8.0)), 1.0);
2730 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
2731 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
2732 *NotEnoughUrgentLatencyHiding = 1;
2733 *UrgentBurstFactorCursor = 0;
2735 *UrgentBurstFactorCursor = CursorBufferSizeInTime /
2736 (CursorBufferSizeInTime - UrgentLatency);
2739 *UrgentBurstFactorCursor = 1;
2743 LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
2744 DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
2747 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
2748 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
2749 *NotEnoughUrgentLatencyHiding = 1;
2750 *UrgentBurstFactorLuma = 0;
2752 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
2755 *UrgentBurstFactorLuma = 1;
2758 if (BytePerPixelInDETC > 0) {
2759 LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
2760 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
2761 / swath_width_chroma_ub;
2764 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
2765 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
2766 *NotEnoughUrgentLatencyHiding = 1;
2767 *UrgentBurstFactorChroma = 0;
2769 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
2770 / (DETBufferSizeInTimeChroma - UrgentLatency);
2773 *UrgentBurstFactorChroma = 1;
2776 } // CalculateUrgentBurstFactor
2778 void dml32_CalculateDCFCLKDeepSleep(
2779 unsigned int NumberOfActiveSurfaces,
2780 unsigned int BytePerPixelY[],
2781 unsigned int BytePerPixelC[],
2783 double VRatioChroma[],
2784 double SwathWidthY[],
2785 double SwathWidthC[],
2786 unsigned int DPPPerSurface[],
2788 double HRatioChroma[],
2789 double PixelClock[],
2790 double PSCL_THROUGHPUT[],
2791 double PSCL_THROUGHPUT_CHROMA[],
2793 double ReadBandwidthLuma[],
2794 double ReadBandwidthChroma[],
2795 unsigned int ReturnBusWidth,
2798 double *DCFClkDeepSleep)
2801 double DisplayPipeLineDeliveryTimeLuma;
2802 double DisplayPipeLineDeliveryTimeChroma;
2803 double DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
2804 double ReadBandwidth = 0.0;
2806 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2808 if (VRatio[k] <= 1) {
2809 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
2812 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
2814 if (BytePerPixelC[k] == 0) {
2815 DisplayPipeLineDeliveryTimeChroma = 0;
2817 if (VRatioChroma[k] <= 1) {
2818 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
2819 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
2821 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
2826 if (BytePerPixelC[k] > 0) {
2827 DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
2828 BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
2829 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
2830 32.0 / DisplayPipeLineDeliveryTimeChroma);
2832 DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
2833 64.0 / DisplayPipeLineDeliveryTimeLuma;
2835 DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
2837 #ifdef __DML_VBA_DEBUG__
2838 dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
2839 dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
2843 for (k = 0; k < NumberOfActiveSurfaces; ++k)
2844 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
2846 *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);
2848 #ifdef __DML_VBA_DEBUG__
2849 dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
2850 dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
2851 dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
2852 dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
2855 for (k = 0; k < NumberOfActiveSurfaces; ++k)
2856 *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
2857 #ifdef __DML_VBA_DEBUG__
2858 dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
2860 } // CalculateDCFCLKDeepSleep
2862 double dml32_CalculateWriteBackDelay(
2863 enum source_format_class WritebackPixelFormat,
2864 double WritebackHRatio,
2865 double WritebackVRatio,
2866 unsigned int WritebackVTaps,
2867 unsigned int WritebackDestinationWidth,
2868 unsigned int WritebackDestinationHeight,
2869 unsigned int WritebackSourceHeight,
2870 unsigned int HTotal)
2872 double CalculateWriteBackDelay;
2874 double Output_lines_last_notclamped;
2875 double WritebackVInit;
2877 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
2878 Line_length = dml_max((double) WritebackDestinationWidth,
2879 dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
2880 Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
2881 dml_ceil(((double)WritebackSourceHeight -
2882 (double) WritebackVInit) / (double)WritebackVRatio, 1.0);
2883 if (Output_lines_last_notclamped < 0) {
2884 CalculateWriteBackDelay = 0;
2886 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
2887 (HTotal - WritebackDestinationWidth) + 80;
2889 return CalculateWriteBackDelay;
2892 void dml32_UseMinimumDCFCLK(
2893 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
2895 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2896 unsigned int MaxInterDCNTileRepeaters,
2897 unsigned int MaxPrefetchMode,
2898 double DRAMClockChangeLatencyFinal,
2899 double FCLKChangeLatency,
2900 double SREnterPlusExitTime,
2901 unsigned int ReturnBusWidth,
2902 unsigned int RoundTripPingLatencyCycles,
2903 unsigned int ReorderingBytes,
2904 unsigned int PixelChunkSizeInKByte,
2905 unsigned int MetaChunkSize,
2907 unsigned int GPUVMMaxPageTableLevels,
2909 unsigned int NumberOfActiveSurfaces,
2910 double HostVMMinPageSize,
2911 unsigned int HostVMMaxNonCachedPageTableLevels,
2912 bool DynamicMetadataVMEnabled,
2913 bool ImmediateFlipRequirement,
2914 bool ProgressiveToInterlaceUnitInOPP,
2915 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
2916 double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
2917 unsigned int VTotal[],
2918 unsigned int VActive[],
2919 unsigned int DynamicMetadataTransmittedBytes[],
2920 unsigned int DynamicMetadataLinesBeforeActiveRequired[],
2922 double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
2923 double RequiredDISPCLK[][2],
2924 double UrgLatency[],
2925 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
2926 double ProjectedDCFClkDeepSleep[][2],
2927 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
2928 unsigned int TotalNumberOfActiveDPP[][2],
2929 unsigned int TotalNumberOfDCCActiveDPP[][2],
2930 unsigned int dpte_group_bytes[],
2931 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
2932 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
2933 unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
2934 unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
2935 unsigned int BytePerPixelY[],
2936 unsigned int BytePerPixelC[],
2937 unsigned int HTotal[],
2938 double PixelClock[],
2939 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
2940 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
2941 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
2942 bool DynamicMetadataEnable[],
2943 double ReadBandwidthLuma[],
2944 double ReadBandwidthChroma[],
2945 double DCFCLKPerState[],
2947 double DCFCLKState[][2])
2949 unsigned int i, j, k;
2950 unsigned int dummy1;
2951 double dummy2, dummy3;
2952 double NormalEfficiency;
2953 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
2955 NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
2956 for (i = 0; i < DC__VOLTAGE_STATES; ++i) {
2957 for (j = 0; j <= 1; ++j) {
2958 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
2959 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
2960 double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
2961 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
2962 double MinimumTWait = 0.0;
2963 double DPTEBandwidth;
2964 double DCFCLKRequiredForAverageBandwidth;
2965 unsigned int ExtraLatencyBytes;
2966 double ExtraLatencyCycles;
2967 double DCFCLKRequiredForPeakBandwidth;
2968 unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
2969 double MinimumTvmPlus2Tr0;
2971 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
2972 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2973 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
2974 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
2975 / (15.75 * HTotal[k] / PixelClock[k]);
2978 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
2979 NoOfDPPState[k] = NoOfDPP[i][j][k];
2981 DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
2982 DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
2984 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
2985 TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
2986 TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
2987 NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
2988 HostVMMaxNonCachedPageTableLevels);
2989 ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
2990 + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
2991 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2992 double DCFCLKCyclesRequiredInPrefetch;
2993 double PrefetchTime;
2995 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
2996 * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
2997 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
2998 * BytePerPixelC[k]) / NormalEfficiency
3000 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
3001 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
3002 / NormalEfficiency / ReturnBusWidth
3003 * (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
3004 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
3006 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
3007 + PixelDCFCLKCyclesRequiredInPrefetch[k];
3008 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
3009 * HTotal[k] / PixelClock[k];
3010 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
3011 DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
3012 UrgLatency[i] * GPUVMMaxPageTableLevels *
3013 (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
3015 MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
3016 UseMALLForPStateChange[k],
3017 SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3019 DRAMClockChangeLatencyFinal,
3022 SREnterPlusExitTime);
3024 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
3025 MinimumTWait - UrgLatency[i] *
3026 ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
3027 GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ?
3028 HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
3029 DynamicMetadataVMExtraLatency[k];
3031 if (PrefetchTime > 0) {
3032 double ExpectedVRatioPrefetch;
3034 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
3035 PixelDCFCLKCyclesRequiredInPrefetch[k] /
3036 DCFCLKCyclesRequiredInPrefetch);
3037 DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
3038 PixelDCFCLKCyclesRequiredInPrefetch[k] /
3039 PrefetchPixelLinesTime[k] *
3040 dml_max(1.0, ExpectedVRatioPrefetch) *
3041 dml_max(1.0, ExpectedVRatioPrefetch / 4);
3042 if (HostVMEnable == true || ImmediateFlipRequirement == true) {
3043 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3044 DCFCLKRequiredForPeakBandwidthPerSurface[k] +
3045 NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
3046 NormalEfficiency / ReturnBusWidth;
3049 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3051 if (DynamicMetadataEnable[k] == true) {
3056 double AllowedTimeForUrgentExtraLatency;
3058 dml32_CalculateVUpdateAndDynamicMetadataParameters(
3059 MaxInterDCNTileRepeaters,
3060 RequiredDPPCLKPerSurface[i][j][k],
3061 RequiredDISPCLK[i][j],
3062 ProjectedDCFClkDeepSleep[i][j],
3065 VTotal[k] - VActive[k],
3066 DynamicMetadataTransmittedBytes[k],
3067 DynamicMetadataLinesBeforeActiveRequired[k],
3069 ProgressiveToInterlaceUnitInOPP,
3079 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
3080 PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
3081 TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
3082 if (AllowedTimeForUrgentExtraLatency > 0)
3083 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3084 dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
3085 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
3087 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3090 DCFCLKRequiredForPeakBandwidth = 0;
3091 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
3092 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
3093 DCFCLKRequiredForPeakBandwidthPerSurface[k];
3095 MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
3096 (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
3097 (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
3098 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3099 double MaximumTvmPlus2Tr0PlusTsw;
3101 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
3102 PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
3103 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
3104 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
3106 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
3107 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
3108 MinimumTvmPlus2Tr0 -
3109 PrefetchPixelLinesTime[k] / 4),
3110 (2 * ExtraLatencyCycles +
3111 PixelDCFCLKCyclesRequiredInPrefetch[k]) /
3112 (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
3115 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
3116 dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
3121 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
3122 unsigned int TotalNumberOfActiveDPP,
3123 unsigned int PixelChunkSizeInKByte,
3124 unsigned int TotalNumberOfDCCActiveDPP,
3125 unsigned int MetaChunkSize,
3128 unsigned int NumberOfActiveSurfaces,
3129 unsigned int NumberOfDPP[],
3130 unsigned int dpte_group_bytes[],
3131 double HostVMInefficiencyFactor,
3132 double HostVMMinPageSize,
3133 unsigned int HostVMMaxNonCachedPageTableLevels)
3137 unsigned int HostVMDynamicLevels;
3139 if (GPUVMEnable == true && HostVMEnable == true) {
3140 if (HostVMMinPageSize < 2048)
3141 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
3142 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
3143 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
3145 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
3147 HostVMDynamicLevels = 0;
3150 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
3151 TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
3153 if (GPUVMEnable == true) {
3154 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3155 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
3156 (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
3162 void dml32_CalculateVUpdateAndDynamicMetadataParameters(
3163 unsigned int MaxInterDCNTileRepeaters,
3166 double DCFClkDeepSleep,
3168 unsigned int HTotal,
3169 unsigned int VBlank,
3170 unsigned int DynamicMetadataTransmittedBytes,
3171 unsigned int DynamicMetadataLinesBeforeActiveRequired,
3172 unsigned int InterlaceEnable,
3173 bool ProgressiveToInterlaceUnitInOPP,
3180 unsigned int *VUpdateOffsetPix,
3181 double *VUpdateWidthPix,
3182 double *VReadyOffsetPix)
3184 double TotalRepeaterDelayTime;
3186 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
3188 dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
3189 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / Dppclk,
3190 TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
3191 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
3192 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3193 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
3194 *Tdmec = HTotal / PixelClock;
3196 if (DynamicMetadataLinesBeforeActiveRequired == 0)
3197 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3199 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3201 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
3202 *Tdmsks = *Tdmsks / 2;
3203 #ifdef __DML_VBA_DEBUG__
3204 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3205 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3206 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3208 dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
3209 __func__, DynamicMetadataLinesBeforeActiveRequired);
3210 dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
3211 dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
3212 dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
3213 dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
3217 double dml32_CalculateTWait(
3218 unsigned int PrefetchMode,
3219 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
3220 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3222 double DRAMClockChangeLatency,
3223 double FCLKChangeLatency,
3224 double UrgentLatency,
3225 double SREnterPlusExitTime)
3229 if (PrefetchMode == 0 &&
3230 !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
3231 !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
3232 !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
3233 !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
3234 TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3235 } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3236 TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3237 } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3238 TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
3240 TWait = UrgentLatency;
3243 #ifdef __DML_VBA_DEBUG__
3244 dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
3245 dml_print("DML::%s: TWait = %f\n", __func__, TWait);
3250 // Function: get_return_bw_mbps
3251 // Megabyte per second
3252 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
3253 const int VoltageLevel,
3254 const bool HostVMEnable,
3255 const double DCFCLK,
3256 const double FabricClock,
3257 const double DRAMSpeed)
3259 double ReturnBW = 0.;
3260 double IdealSDPPortBandwidth = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
3261 double IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
3262 double IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
3263 double PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3264 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3265 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3266 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3267 double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3268 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3269 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3270 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3272 if (HostVMEnable != true)
3273 ReturnBW = PixelDataOnlyReturnBW;
3275 ReturnBW = PixelMixedWithVMDataReturnBW;
3277 #ifdef __DML_VBA_DEBUG__
3278 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3279 dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
3280 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
3281 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3282 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
3283 dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth);
3284 dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth);
3285 dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth);
3286 dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW);
3287 dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
3288 dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW);
3293 // Function: get_return_bw_mbps_vm_only
3294 // Megabyte per second
3295 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
3296 const int VoltageLevel,
3297 const double DCFCLK,
3298 const double FabricClock,
3299 const double DRAMSpeed)
3301 double VMDataOnlyReturnBW = dml_min3(
3302 soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3303 FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
3304 * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3305 DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
3306 * (VoltageLevel < 2 ?
3307 soc->pct_ideal_dram_bw_after_urgent_strobe :
3308 soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
3309 #ifdef __DML_VBA_DEBUG__
3310 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3311 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
3312 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3313 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
3314 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
3316 return VMDataOnlyReturnBW;
3319 double dml32_CalculateExtraLatency(
3320 unsigned int RoundTripPingLatencyCycles,
3321 unsigned int ReorderingBytes,
3323 unsigned int TotalNumberOfActiveDPP,
3324 unsigned int PixelChunkSizeInKByte,
3325 unsigned int TotalNumberOfDCCActiveDPP,
3326 unsigned int MetaChunkSize,
3330 unsigned int NumberOfActiveSurfaces,
3331 unsigned int NumberOfDPP[],
3332 unsigned int dpte_group_bytes[],
3333 double HostVMInefficiencyFactor,
3334 double HostVMMinPageSize,
3335 unsigned int HostVMMaxNonCachedPageTableLevels)
3337 double ExtraLatencyBytes;
3338 double ExtraLatency;
3340 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
3342 TotalNumberOfActiveDPP,
3343 PixelChunkSizeInKByte,
3344 TotalNumberOfDCCActiveDPP,
3348 NumberOfActiveSurfaces,
3351 HostVMInefficiencyFactor,
3353 HostVMMaxNonCachedPageTableLevels);
3355 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
3357 #ifdef __DML_VBA_DEBUG__
3358 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
3359 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
3360 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
3361 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
3362 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
3365 return ExtraLatency;
3366 } // CalculateExtraLatency
3368 bool dml32_CalculatePrefetchSchedule(
3369 double HostVMInefficiencyFactor,
3371 unsigned int DSCDelay,
3372 double DPPCLKDelaySubtotalPlusCNVCFormater,
3373 double DPPCLKDelaySCL,
3374 double DPPCLKDelaySCLLBOnly,
3375 double DPPCLKDelayCNVCCursor,
3376 double DISPCLKDelaySubtotal,
3377 unsigned int DPP_RECOUT_WIDTH,
3378 enum output_format_class OutputFormat,
3379 unsigned int MaxInterDCNTileRepeaters,
3380 unsigned int VStartup,
3381 unsigned int MaxVStartup,
3382 unsigned int GPUVMPageTableLevels,
3385 unsigned int HostVMMaxNonCachedPageTableLevels,
3386 double HostVMMinPageSize,
3387 bool DynamicMetadataEnable,
3388 bool DynamicMetadataVMEnabled,
3389 int DynamicMetadataLinesBeforeActiveRequired,
3390 unsigned int DynamicMetadataTransmittedBytes,
3391 double UrgentLatency,
3392 double UrgentExtraLatency,
3394 unsigned int PDEAndMetaPTEBytesFrame,
3395 unsigned int MetaRowByte,
3396 unsigned int PixelPTEBytesPerRow,
3397 double PrefetchSourceLinesY,
3398 unsigned int SwathWidthY,
3399 unsigned int VInitPreFillY,
3400 unsigned int MaxNumSwathY,
3401 double PrefetchSourceLinesC,
3402 unsigned int SwathWidthC,
3403 unsigned int VInitPreFillC,
3404 unsigned int MaxNumSwathC,
3405 unsigned int swath_width_luma_ub,
3406 unsigned int swath_width_chroma_ub,
3407 unsigned int SwathHeightY,
3408 unsigned int SwathHeightC,
3411 double *DSTXAfterScaler,
3412 double *DSTYAfterScaler,
3413 double *DestinationLinesForPrefetch,
3414 double *PrefetchBandwidth,
3415 double *DestinationLinesToRequestVMInVBlank,
3416 double *DestinationLinesToRequestRowInVBlank,
3417 double *VRatioPrefetchY,
3418 double *VRatioPrefetchC,
3419 double *RequiredPrefetchPixDataBWLuma,
3420 double *RequiredPrefetchPixDataBWChroma,
3421 bool *NotEnoughTimeForDynamicMetadata,
3423 double *prefetch_vmrow_bw,
3427 unsigned int *VUpdateOffsetPix,
3428 double *VUpdateWidthPix,
3429 double *VReadyOffsetPix)
3431 bool MyError = false;
3432 unsigned int DPPCycles, DISPCLKCycles;
3433 double DSTTotalPixelsAfterScaler;
3435 double dst_y_prefetch_equ;
3436 double prefetch_bw_oto;
3439 double Tvm_oto_lines;
3440 double Tr0_oto_lines;
3441 double dst_y_prefetch_oto;
3442 double TimeForFetchingMetaPTE = 0;
3443 double TimeForFetchingRowInVBlank = 0;
3444 double LinesToRequestPrefetchPixelData = 0;
3445 unsigned int HostVMDynamicLevelsTrips;
3449 double Tvm_trips_rounded;
3450 double Tr0_trips_rounded;
3452 double Tpre_rounded;
3453 double prefetch_bw_equ;
3459 double prefetch_sw_bytes;
3462 unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__;
3464 double Tsw_est1 = 0;
3465 double Tsw_est3 = 0;
3467 if (GPUVMEnable == true && HostVMEnable == true)
3468 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3470 HostVMDynamicLevelsTrips = 0;
3471 #ifdef __DML_VBA_DEBUG__
3472 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
3473 dml_print("DML::%s: GPUVMPageTableLevels = %d\n", __func__, GPUVMPageTableLevels);
3474 dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
3475 dml_print("DML::%s: HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
3476 __func__, HostVMEnable, HostVMInefficiencyFactor);
3478 dml32_CalculateVUpdateAndDynamicMetadataParameters(
3479 MaxInterDCNTileRepeaters,
3482 myPipe->DCFClkDeepSleep,
3486 DynamicMetadataTransmittedBytes,
3487 DynamicMetadataLinesBeforeActiveRequired,
3488 myPipe->InterlaceEnable,
3489 myPipe->ProgressiveToInterlaceUnitInOPP,
3500 LineTime = myPipe->HTotal / myPipe->PixelClock;
3501 trip_to_mem = UrgentLatency;
3502 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
3504 if (DynamicMetadataVMEnabled == true)
3505 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
3507 *Tdmdl = TWait + UrgentExtraLatency;
3509 #ifdef __DML_VBA_ALLOW_DELTA__
3510 if (DynamicMetadataEnable == false)
3514 if (DynamicMetadataEnable == true) {
3515 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
3516 *NotEnoughTimeForDynamicMetadata = true;
3517 #ifdef __DML_VBA_DEBUG__
3518 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
3519 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
3521 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3522 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
3524 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
3528 *NotEnoughTimeForDynamicMetadata = false;
3531 *NotEnoughTimeForDynamicMetadata = false;
3534 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true &&
3535 GPUVMEnable == true ? TWait + Tvm_trips : 0);
3537 if (myPipe->ScalerEnabled)
3538 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
3540 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
3542 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
3544 DISPCLKCycles = DISPCLKDelaySubtotal;
3546 if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
3549 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
3550 myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
3552 *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
3553 + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
3554 + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
3555 myPipe->HActive / 2 : 0)
3556 + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
3558 #ifdef __DML_VBA_DEBUG__
3559 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
3560 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
3561 dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
3562 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
3563 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk);
3564 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
3565 dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode);
3566 dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
3567 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
3570 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
3571 *DSTYAfterScaler = 1;
3573 *DSTYAfterScaler = 0;
3575 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
3576 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
3577 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
3578 #ifdef __DML_VBA_DEBUG__
3579 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
3580 dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
3585 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
3587 if (GPUVMEnable == true) {
3588 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
3589 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3590 if (GPUVMPageTableLevels >= 3) {
3591 *Tno_bw = UrgentExtraLatency + trip_to_mem *
3592 (double) ((GPUVMPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
3593 } else if (GPUVMPageTableLevels == 1 && myPipe->DCCEnable != true) {
3594 Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
3595 4.0 * LineTime; // VBA_ERROR
3596 *Tno_bw = UrgentExtraLatency;
3600 } else if (myPipe->DCCEnable == true) {
3601 Tvm_trips_rounded = LineTime / 4.0;
3602 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3605 Tvm_trips_rounded = LineTime / 4.0;
3606 Tr0_trips_rounded = LineTime / 2.0;
3609 Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
3610 Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
3612 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
3613 || myPipe->SourcePixelFormat == dm_420_12) {
3614 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
3616 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
3619 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
3620 + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
3621 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
3622 prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
3624 min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
3625 min_Lsw = dml_max(min_Lsw, 1.0);
3626 Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
3628 if (GPUVMEnable == true) {
3631 *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
3634 Tvm_oto = LineTime / 4.0;
3636 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
3639 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
3640 (LineTime - Tvm_oto)/2.0,
3642 #ifdef __DML_VBA_DEBUG__
3643 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
3644 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
3645 dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
3646 dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
3647 dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
3650 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
3652 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
3653 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
3654 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
3656 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
3657 (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
3659 #ifdef __DML_VBA_DEBUG__
3660 dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
3661 dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
3662 dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
3663 dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
3664 dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
3665 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3666 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3667 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3668 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
3669 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3670 dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
3671 dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
3672 dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
3673 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3674 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
3675 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
3676 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3677 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
3678 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
3679 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
3680 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
3681 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
3682 dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
3683 dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
3684 dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
3685 dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
3686 dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
3689 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
3690 Tpre_rounded = dst_y_prefetch_equ * LineTime;
3691 #ifdef __DML_VBA_DEBUG__
3692 dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
3693 dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
3694 dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
3695 dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
3696 __func__, VStartup * LineTime);
3697 dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
3698 dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
3699 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
3700 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3701 dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
3702 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
3703 dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
3704 __func__, *DSTYAfterScaler);
3706 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
3707 MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
3709 if (prefetch_sw_bytes < dep_bytes)
3710 prefetch_sw_bytes = 2 * dep_bytes;
3712 *PrefetchBandwidth = 0;
3713 *DestinationLinesToRequestVMInVBlank = 0;
3714 *DestinationLinesToRequestRowInVBlank = 0;
3715 *VRatioPrefetchY = 0;
3716 *VRatioPrefetchC = 0;
3717 *RequiredPrefetchPixDataBWLuma = 0;
3718 if (dst_y_prefetch_equ > 1) {
3719 double PrefetchBandwidth1;
3720 double PrefetchBandwidth2;
3721 double PrefetchBandwidth3;
3722 double PrefetchBandwidth4;
3724 if (Tpre_rounded - *Tno_bw > 0) {
3725 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3726 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3727 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
3728 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
3730 PrefetchBandwidth1 = 0;
3732 if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
3733 && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
3734 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3735 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3736 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
3739 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
3740 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
3741 (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
3743 PrefetchBandwidth2 = 0;
3745 if (Tpre_rounded - Tvm_trips_rounded > 0) {
3746 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3747 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
3748 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
3750 PrefetchBandwidth3 = 0;
3753 if (VStartup == MaxVStartup &&
3754 (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
3755 LineTime - Tvm_trips_rounded > 0) {
3756 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3757 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
3760 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
3761 PrefetchBandwidth4 = prefetch_sw_bytes /
3762 (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
3764 PrefetchBandwidth4 = 0;
3767 #ifdef __DML_VBA_DEBUG__
3768 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
3769 dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
3770 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
3771 dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
3772 dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
3773 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
3774 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
3775 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
3776 dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
3783 if (PrefetchBandwidth1 > 0) {
3784 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
3785 >= Tvm_trips_rounded
3786 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3787 / PrefetchBandwidth1 >= Tr0_trips_rounded) {
3796 if (PrefetchBandwidth2 > 0) {
3797 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
3798 >= Tvm_trips_rounded
3799 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3800 / PrefetchBandwidth2 < Tr0_trips_rounded) {
3809 if (PrefetchBandwidth3 > 0) {
3810 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
3811 Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
3812 HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
3813 Tr0_trips_rounded) {
3823 prefetch_bw_equ = PrefetchBandwidth1;
3825 prefetch_bw_equ = PrefetchBandwidth2;
3827 prefetch_bw_equ = PrefetchBandwidth3;
3829 prefetch_bw_equ = PrefetchBandwidth4;
3831 #ifdef __DML_VBA_DEBUG__
3832 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
3833 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
3834 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
3835 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
3838 if (prefetch_bw_equ > 0) {
3839 if (GPUVMEnable == true) {
3840 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
3841 HostVMInefficiencyFactor / prefetch_bw_equ,
3842 Tvm_trips, LineTime / 4);
3844 Tvm_equ = LineTime / 4;
3847 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
3848 Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
3849 HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
3850 (LineTime - Tvm_equ) / 2, LineTime / 4);
3852 Tr0_equ = (LineTime - Tvm_equ) / 2;
3857 #ifdef __DML_VBA_DEBUG__
3858 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
3863 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
3864 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
3865 TimeForFetchingMetaPTE = Tvm_oto;
3866 TimeForFetchingRowInVBlank = Tr0_oto;
3867 *PrefetchBandwidth = prefetch_bw_oto;
3869 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
3870 TimeForFetchingMetaPTE = Tvm_equ;
3871 TimeForFetchingRowInVBlank = Tr0_equ;
3872 *PrefetchBandwidth = prefetch_bw_equ;
3875 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
3877 *DestinationLinesToRequestRowInVBlank =
3878 dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
3880 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch -
3881 *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
3883 #ifdef __DML_VBA_DEBUG__
3884 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
3885 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3886 __func__, *DestinationLinesToRequestVMInVBlank);
3887 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
3888 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3889 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
3890 __func__, *DestinationLinesToRequestRowInVBlank);
3891 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3892 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
3895 if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
3896 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
3897 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3898 #ifdef __DML_VBA_DEBUG__
3899 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3900 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
3901 dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
3903 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
3904 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
3906 dml_max((double) PrefetchSourceLinesY /
3907 LinesToRequestPrefetchPixelData,
3908 (double) MaxNumSwathY * SwathHeightY /
3909 (LinesToRequestPrefetchPixelData -
3910 (VInitPreFillY - 3.0) / 2.0));
3911 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3914 *VRatioPrefetchY = 0;
3916 #ifdef __DML_VBA_DEBUG__
3917 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3918 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3919 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
3923 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
3924 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3926 #ifdef __DML_VBA_DEBUG__
3927 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3928 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
3929 dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
3931 if ((SwathHeightC > 4)) {
3932 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
3934 dml_max(*VRatioPrefetchC,
3935 (double) MaxNumSwathC * SwathHeightC /
3936 (LinesToRequestPrefetchPixelData -
3937 (VInitPreFillC - 3.0) / 2.0));
3938 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3941 *VRatioPrefetchC = 0;
3943 #ifdef __DML_VBA_DEBUG__
3944 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3945 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3946 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
3950 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
3951 / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
3954 #ifdef __DML_VBA_DEBUG__
3955 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3956 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3957 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3958 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
3959 __func__, *RequiredPrefetchPixDataBWLuma);
3961 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
3962 LinesToRequestPrefetchPixelData
3963 * myPipe->BytePerPixelC
3964 * swath_width_chroma_ub / LineTime;
3967 #ifdef __DML_VBA_DEBUG__
3968 dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
3969 __func__, LinesToRequestPrefetchPixelData);
3971 *VRatioPrefetchY = 0;
3972 *VRatioPrefetchC = 0;
3973 *RequiredPrefetchPixDataBWLuma = 0;
3974 *RequiredPrefetchPixDataBWChroma = 0;
3976 #ifdef __DML_VBA_DEBUG__
3977 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
3978 (double)LinesToRequestPrefetchPixelData * LineTime +
3979 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
3980 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
3981 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
3982 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
3983 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
3984 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
3985 TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
3986 ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
3987 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
3988 PixelPTEBytesPerRow);
3992 #ifdef __DML_VBA_DEBUG__
3993 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
3994 __func__, dst_y_prefetch_equ);
3999 double prefetch_vm_bw;
4000 double prefetch_row_bw;
4002 if (PDEAndMetaPTEBytesFrame == 0) {
4004 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
4005 #ifdef __DML_VBA_DEBUG__
4006 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
4007 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
4008 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
4009 __func__, *DestinationLinesToRequestVMInVBlank);
4010 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
4012 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
4013 (*DestinationLinesToRequestVMInVBlank * LineTime);
4014 #ifdef __DML_VBA_DEBUG__
4015 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
4020 #ifdef __DML_VBA_DEBUG__
4021 dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
4022 __func__, *DestinationLinesToRequestVMInVBlank);
4026 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
4027 prefetch_row_bw = 0;
4028 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
4029 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
4030 (*DestinationLinesToRequestRowInVBlank * LineTime);
4032 #ifdef __DML_VBA_DEBUG__
4033 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
4034 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
4035 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
4036 __func__, *DestinationLinesToRequestRowInVBlank);
4037 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
4040 prefetch_row_bw = 0;
4042 #ifdef __DML_VBA_DEBUG__
4043 dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
4044 __func__, *DestinationLinesToRequestRowInVBlank);
4048 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
4052 *PrefetchBandwidth = 0;
4053 TimeForFetchingMetaPTE = 0;
4054 TimeForFetchingRowInVBlank = 0;
4055 *DestinationLinesToRequestVMInVBlank = 0;
4056 *DestinationLinesToRequestRowInVBlank = 0;
4057 *DestinationLinesForPrefetch = 0;
4058 LinesToRequestPrefetchPixelData = 0;
4059 *VRatioPrefetchY = 0;
4060 *VRatioPrefetchC = 0;
4061 *RequiredPrefetchPixDataBWLuma = 0;
4062 *RequiredPrefetchPixDataBWChroma = 0;
4066 } // CalculatePrefetchSchedule
4068 void dml32_CalculateFlipSchedule(
4069 double HostVMInefficiencyFactor,
4070 double UrgentExtraLatency,
4071 double UrgentLatency,
4072 unsigned int GPUVMMaxPageTableLevels,
4074 unsigned int HostVMMaxNonCachedPageTableLevels,
4076 double HostVMMinPageSize,
4077 double PDEAndMetaPTEBytesPerFrame,
4078 double MetaRowBytes,
4079 double DPTEBytesPerRow,
4080 double BandwidthAvailableForImmediateFlip,
4081 unsigned int TotImmediateFlipBytes,
4082 enum source_format_class SourcePixelFormat,
4085 double VRatioChroma,
4088 unsigned int dpte_row_height,
4089 unsigned int meta_row_height,
4090 unsigned int dpte_row_height_chroma,
4091 unsigned int meta_row_height_chroma,
4092 bool use_one_row_for_frame_flip,
4095 double *DestinationLinesToRequestVMInImmediateFlip,
4096 double *DestinationLinesToRequestRowInImmediateFlip,
4097 double *final_flip_bw,
4098 bool *ImmediateFlipSupportedForPipe)
4100 double min_row_time = 0.0;
4101 unsigned int HostVMDynamicLevelsTrips;
4102 double TimeForFetchingMetaPTEImmediateFlip;
4103 double TimeForFetchingRowInVBlankImmediateFlip;
4104 double ImmediateFlipBW;
4106 if (GPUVMEnable == true && HostVMEnable == true)
4107 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
4109 HostVMDynamicLevelsTrips = 0;
4111 #ifdef __DML_VBA_DEBUG__
4112 dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
4113 dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
4116 if (TotImmediateFlipBytes > 0) {
4117 if (use_one_row_for_frame_flip) {
4118 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
4119 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4121 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
4122 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4124 if (GPUVMEnable == true) {
4125 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
4126 HostVMInefficiencyFactor / ImmediateFlipBW,
4127 UrgentExtraLatency + UrgentLatency *
4128 (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
4131 TimeForFetchingMetaPTEImmediateFlip = 0;
4133 if ((GPUVMEnable == true || DCCEnable == true)) {
4134 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
4135 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
4136 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
4138 TimeForFetchingRowInVBlankImmediateFlip = 0;
4141 *DestinationLinesToRequestVMInImmediateFlip =
4142 dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
4143 *DestinationLinesToRequestRowInImmediateFlip =
4144 dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
4146 if (GPUVMEnable == true) {
4147 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
4148 (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
4149 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4150 (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
4151 } else if ((GPUVMEnable == true || DCCEnable == true)) {
4152 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4153 (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
4158 TimeForFetchingMetaPTEImmediateFlip = 0;
4159 TimeForFetchingRowInVBlankImmediateFlip = 0;
4160 *DestinationLinesToRequestVMInImmediateFlip = 0;
4161 *DestinationLinesToRequestRowInImmediateFlip = 0;
4165 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
4166 if (GPUVMEnable == true && DCCEnable != true) {
4167 min_row_time = dml_min(dpte_row_height *
4168 LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
4169 } else if (GPUVMEnable != true && DCCEnable == true) {
4170 min_row_time = dml_min(meta_row_height *
4171 LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
4173 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
4174 LineTime / VRatio, dpte_row_height_chroma * LineTime /
4175 VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
4178 if (GPUVMEnable == true && DCCEnable != true) {
4179 min_row_time = dpte_row_height * LineTime / VRatio;
4180 } else if (GPUVMEnable != true && DCCEnable == true) {
4181 min_row_time = meta_row_height * LineTime / VRatio;
4184 dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
4188 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
4189 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
4191 *ImmediateFlipSupportedForPipe = false;
4193 *ImmediateFlipSupportedForPipe = true;
4196 #ifdef __DML_VBA_DEBUG__
4197 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
4198 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
4199 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
4200 __func__, *DestinationLinesToRequestVMInImmediateFlip);
4201 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
4202 __func__, *DestinationLinesToRequestRowInImmediateFlip);
4203 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
4204 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
4205 __func__, TimeForFetchingRowInVBlankImmediateFlip);
4206 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
4207 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
4209 } // CalculateFlipSchedule
4211 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
4212 bool USRRetrainingRequiredFinal,
4213 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
4214 unsigned int PrefetchMode,
4215 unsigned int NumberOfActiveSurfaces,
4216 unsigned int MaxLineBufferLines,
4217 unsigned int LineBufferSize,
4218 unsigned int WritebackInterfaceBufferSize,
4221 bool SynchronizeTimingsFinal,
4222 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
4224 unsigned int dpte_group_bytes[],
4225 unsigned int meta_row_height[],
4226 unsigned int meta_row_height_chroma[],
4227 SOCParametersList mmSOCParameters,
4228 unsigned int WritebackChunkSize,
4230 double DCFClkDeepSleep,
4231 unsigned int DETBufferSizeY[],
4232 unsigned int DETBufferSizeC[],
4233 unsigned int SwathHeightY[],
4234 unsigned int SwathHeightC[],
4235 unsigned int LBBitPerPixel[],
4236 double SwathWidthY[],
4237 double SwathWidthC[],
4239 double HRatioChroma[],
4240 unsigned int VTaps[],
4241 unsigned int VTapsChroma[],
4243 double VRatioChroma[],
4244 unsigned int HTotal[],
4245 unsigned int VTotal[],
4246 unsigned int VActive[],
4247 double PixelClock[],
4248 unsigned int BlendingAndTiming[],
4249 unsigned int DPPPerSurface[],
4250 double BytePerPixelDETY[],
4251 double BytePerPixelDETC[],
4252 double DSTXAfterScaler[],
4253 double DSTYAfterScaler[],
4254 bool WritebackEnable[],
4255 enum source_format_class WritebackPixelFormat[],
4256 double WritebackDestinationWidth[],
4257 double WritebackDestinationHeight[],
4258 double WritebackSourceHeight[],
4259 bool UnboundedRequestEnabled,
4260 unsigned int CompressedBufferSizeInkByte,
4263 Watermarks *Watermark,
4264 enum clock_change_support *DRAMClockChangeSupport,
4265 double MaxActiveDRAMClockChangeLatencySupported[],
4266 unsigned int SubViewportLinesNeededInMALL[],
4267 enum dm_fclock_change_support *FCLKChangeSupport,
4268 double *MinActiveFCLKChangeLatencySupported,
4269 bool *USRRetrainingSupport,
4270 double ActiveDRAMClockChangeLatencyMargin[])
4272 unsigned int i, j, k;
4273 unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
4274 unsigned int DRAMClockChangeSupportNumber = 0;
4275 unsigned int LastSurfaceWithoutMargin;
4276 unsigned int DRAMClockChangeMethod = 0;
4277 bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
4278 double MinActiveFCLKChangeMargin = 0.;
4279 double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
4280 double ActiveClockChangeLatencyHidingY;
4281 double ActiveClockChangeLatencyHidingC;
4282 double ActiveClockChangeLatencyHiding;
4283 double EffectiveDETBufferSizeY;
4284 double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
4285 double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
4286 double TotalPixelBW = 0.0;
4287 bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
4288 double EffectiveLBLatencyHidingY;
4289 double EffectiveLBLatencyHidingC;
4290 double LinesInDETY[DC__NUM_DPP__MAX];
4291 double LinesInDETC[DC__NUM_DPP__MAX];
4292 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
4293 unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
4294 double FullDETBufferingTimeY;
4295 double FullDETBufferingTimeC;
4296 double WritebackDRAMClockChangeLatencyMargin;
4297 double WritebackFCLKChangeLatencyMargin;
4298 double WritebackLatencyHiding;
4299 bool SameTimingForFCLKChange;
4301 unsigned int TotalActiveWriteback = 0;
4302 unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
4303 unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
4305 Watermark->UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
4306 Watermark->USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
4307 + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
4308 Watermark->DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + Watermark->UrgentWatermark;
4309 Watermark->FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + Watermark->UrgentWatermark;
4310 Watermark->StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
4311 + 10 / DCFClkDeepSleep;
4312 Watermark->StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
4313 + 10 / DCFClkDeepSleep;
4314 Watermark->Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
4315 + 10 / DCFClkDeepSleep;
4316 Watermark->Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
4317 + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
4319 #ifdef __DML_VBA_DEBUG__
4320 dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
4321 dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
4322 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
4323 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, Watermark->UrgentWatermark);
4324 dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, Watermark->USRRetrainingWatermark);
4325 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, Watermark->DRAMClockChangeWatermark);
4326 dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, Watermark->FCLKChangeWatermark);
4327 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, Watermark->StutterExitWatermark);
4328 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, Watermark->StutterEnterPlusExitWatermark);
4329 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, Watermark->Z8StutterExitWatermark);
4330 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
4331 __func__, Watermark->Z8StutterEnterPlusExitWatermark);
4335 TotalActiveWriteback = 0;
4336 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4337 if (WritebackEnable[k] == true)
4338 TotalActiveWriteback = TotalActiveWriteback + 1;
4341 if (TotalActiveWriteback <= 1) {
4342 Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
4344 Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
4345 + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4347 if (USRRetrainingRequiredFinal)
4348 Watermark->WritebackUrgentWatermark = Watermark->WritebackUrgentWatermark
4349 + mmSOCParameters.USRRetrainingLatency;
4351 if (TotalActiveWriteback <= 1) {
4352 Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4353 + mmSOCParameters.WritebackLatency;
4354 Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4355 + mmSOCParameters.WritebackLatency;
4357 Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4358 + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4359 Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4360 + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024 / 32 / SOCCLK;
4363 if (USRRetrainingRequiredFinal)
4364 Watermark->WritebackDRAMClockChangeWatermark = Watermark->WritebackDRAMClockChangeWatermark
4365 + mmSOCParameters.USRRetrainingLatency;
4367 if (USRRetrainingRequiredFinal)
4368 Watermark->WritebackFCLKChangeWatermark = Watermark->WritebackFCLKChangeWatermark
4369 + mmSOCParameters.USRRetrainingLatency;
4371 #ifdef __DML_VBA_DEBUG__
4372 dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
4373 __func__, Watermark->WritebackDRAMClockChangeWatermark);
4374 dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, Watermark->WritebackFCLKChangeWatermark);
4375 dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, Watermark->WritebackUrgentWatermark);
4376 dml_print("DML::%s: USRRetrainingRequiredFinal = %d\n", __func__, USRRetrainingRequiredFinal);
4377 dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
4380 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4381 TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] +
4382 SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) / (HTotal[k] / PixelClock[k]);
4385 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4387 LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1);
4388 LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1);
4391 #ifdef __DML_VBA_DEBUG__
4392 dml_print("DML::%s: k=%d, MaxLineBufferLines = %d\n", __func__, k, MaxLineBufferLines);
4393 dml_print("DML::%s: k=%d, LineBufferSize = %d\n", __func__, k, LineBufferSize);
4394 dml_print("DML::%s: k=%d, LBBitPerPixel = %d\n", __func__, k, LBBitPerPixel[k]);
4395 dml_print("DML::%s: k=%d, HRatio = %f\n", __func__, k, HRatio[k]);
4396 dml_print("DML::%s: k=%d, VTaps = %d\n", __func__, k, VTaps[k]);
4399 EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]);
4400 EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
4401 EffectiveDETBufferSizeY = DETBufferSizeY[k];
4403 if (UnboundedRequestEnabled) {
4404 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
4405 + CompressedBufferSizeInkByte * 1024
4406 * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k])
4407 / (HTotal[k] / PixelClock[k]) / TotalPixelBW;
4410 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
4411 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
4412 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
4414 ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
4415 - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k];
4417 if (NumberOfActiveSurfaces > 1) {
4418 ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
4419 - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightY[k] * HTotal[k]
4420 / PixelClock[k] / VRatio[k];
4423 if (BytePerPixelDETC[k] > 0) {
4424 LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
4425 LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]);
4426 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k])
4428 ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
4429 - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k]
4431 if (NumberOfActiveSurfaces > 1) {
4432 ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
4433 - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightC[k] * HTotal[k]
4434 / PixelClock[k] / VRatioChroma[k];
4436 ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY,
4437 ActiveClockChangeLatencyHidingC);
4439 ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
4442 ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
4443 - Watermark->DRAMClockChangeWatermark;
4444 ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
4445 - Watermark->FCLKChangeWatermark;
4446 USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark;
4448 if (WritebackEnable[k]) {
4449 WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024
4450 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k]
4451 / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
4452 if (WritebackPixelFormat[k] == dm_444_64)
4453 WritebackLatencyHiding = WritebackLatencyHiding / 2;
4455 WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
4456 - Watermark->WritebackDRAMClockChangeWatermark;
4458 WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
4459 - Watermark->WritebackFCLKChangeWatermark;
4461 ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
4462 WritebackFCLKChangeLatencyMargin);
4463 ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k],
4464 WritebackDRAMClockChangeLatencyMargin);
4466 MaxActiveDRAMClockChangeLatencySupported[k] =
4467 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
4469 (ActiveDRAMClockChangeLatencyMargin[k]
4470 + mmSOCParameters.DRAMClockChangeLatency);
4473 for (i = 0; i < NumberOfActiveSurfaces; ++i) {
4474 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
4476 (BlendingAndTiming[i] == i && BlendingAndTiming[j] == i) ||
4477 (BlendingAndTiming[j] == j && BlendingAndTiming[i] == j) ||
4478 (BlendingAndTiming[i] == BlendingAndTiming[j] && BlendingAndTiming[i] != i) ||
4479 (SynchronizeTimingsFinal && PixelClock[i] == PixelClock[j] &&
4480 HTotal[i] == HTotal[j] && VTotal[i] == VTotal[j] &&
4481 VActive[i] == VActive[j]) || (SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
4482 (DRRDisplay[i] || DRRDisplay[j]))) {
4483 SynchronizedSurfaces[i][j] = true;
4485 SynchronizedSurfaces[i][j] = false;
4490 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4491 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4492 (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
4493 ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
4494 FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
4495 MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
4496 SurfaceWithMinActiveFCLKChangeMargin = k;
4500 *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
4502 SameTimingForFCLKChange = true;
4503 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4504 if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
4505 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4506 (SameTimingForFCLKChange ||
4507 ActiveFCLKChangeLatencyMargin[k] <
4508 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
4509 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
4511 SameTimingForFCLKChange = false;
4515 if (MinActiveFCLKChangeMargin > 0) {
4516 *FCLKChangeSupport = dm_fclock_change_vactive;
4517 } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
4518 (PrefetchMode <= 1)) {
4519 *FCLKChangeSupport = dm_fclock_change_vblank;
4521 *FCLKChangeSupport = dm_fclock_change_unsupported;
4524 *USRRetrainingSupport = true;
4525 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4526 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4527 (USRRetrainingLatencyMargin[k] < 0)) {
4528 *USRRetrainingSupport = false;
4532 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4533 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
4534 UseMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
4535 UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
4536 ActiveDRAMClockChangeLatencyMargin[k] < 0) {
4537 if (PrefetchMode > 0) {
4538 DRAMClockChangeSupportNumber = 2;
4539 } else if (DRAMClockChangeSupportNumber == 0) {
4540 DRAMClockChangeSupportNumber = 1;
4541 LastSurfaceWithoutMargin = k;
4542 } else if (DRAMClockChangeSupportNumber == 1 &&
4543 !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
4544 DRAMClockChangeSupportNumber = 2;
4549 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4550 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
4551 DRAMClockChangeMethod = 1;
4552 else if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
4553 DRAMClockChangeMethod = 2;
4556 if (DRAMClockChangeMethod == 0) {
4557 if (DRAMClockChangeSupportNumber == 0)
4558 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
4559 else if (DRAMClockChangeSupportNumber == 1)
4560 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
4562 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4563 } else if (DRAMClockChangeMethod == 1) {
4564 if (DRAMClockChangeSupportNumber == 0)
4565 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
4566 else if (DRAMClockChangeSupportNumber == 1)
4567 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
4569 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4571 if (DRAMClockChangeSupportNumber == 0)
4572 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
4573 else if (DRAMClockChangeSupportNumber == 1)
4574 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
4576 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4579 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4580 unsigned int dst_y_pstate;
4581 unsigned int src_y_pstate_l;
4582 unsigned int src_y_pstate_c;
4583 unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
4585 dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (HTotal[k] / PixelClock[k]), 1);
4586 src_y_pstate_l = dml_ceil(dst_y_pstate * VRatio[k], SwathHeightY[k]);
4587 src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
4588 sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + meta_row_height[k];
4590 #ifdef __DML_VBA_DEBUG__
4591 dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
4592 dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
4593 dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
4594 dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
4595 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]);
4596 dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate);
4597 dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l);
4598 dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l);
4599 dml_print("DML::%s: k=%d, meta_row_height = %d\n", __func__, k, meta_row_height[k]);
4600 dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l);
4602 SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
4604 if (BytePerPixelDETC[k] > 0) {
4605 src_y_pstate_c = dml_ceil(dst_y_pstate * VRatioChroma[k], SwathHeightC[k]);
4606 src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
4607 sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + meta_row_height_chroma[k];
4608 SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
4610 #ifdef __DML_VBA_DEBUG__
4611 dml_print("DML::%s: k=%d, src_y_pstate_c = %d\n", __func__, k, src_y_pstate_c);
4612 dml_print("DML::%s: k=%d, src_y_ahead_c = %d\n", __func__, k, src_y_ahead_c);
4613 dml_print("DML::%s: k=%d, meta_row_height_chroma = %d\n", __func__, k, meta_row_height_chroma[k]);
4614 dml_print("DML::%s: k=%d, sub_vp_lines_c = %d\n", __func__, k, sub_vp_lines_c);
4618 #ifdef __DML_VBA_DEBUG__
4619 dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
4620 dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
4621 dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
4622 __func__, *MinActiveFCLKChangeLatencySupported);
4623 dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
4625 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
4627 double dml32_CalculateWriteBackDISPCLK(
4628 enum source_format_class WritebackPixelFormat,
4630 double WritebackHRatio,
4631 double WritebackVRatio,
4632 unsigned int WritebackHTaps,
4633 unsigned int WritebackVTaps,
4634 unsigned int WritebackSourceWidth,
4635 unsigned int WritebackDestinationWidth,
4636 unsigned int HTotal,
4637 unsigned int WritebackLineBufferSize,
4638 double DISPCLKDPPCLKVCOSpeed)
4640 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4642 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
4643 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
4644 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
4645 WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
4646 return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
4649 void dml32_CalculateMinAndMaxPrefetchMode(
4650 enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal,
4651 unsigned int *MinPrefetchMode,
4652 unsigned int *MaxPrefetchMode)
4654 if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
4655 *MinPrefetchMode = 3;
4656 *MaxPrefetchMode = 3;
4657 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
4658 *MinPrefetchMode = 2;
4659 *MaxPrefetchMode = 2;
4660 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
4661 *MinPrefetchMode = 1;
4662 *MaxPrefetchMode = 1;
4663 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
4664 *MinPrefetchMode = 0;
4665 *MaxPrefetchMode = 0;
4666 } else if (AllowForPStateChangeOrStutterInVBlankFinal ==
4667 dm_prefetch_support_uclk_fclk_and_stutter_if_possible) {
4668 *MinPrefetchMode = 0;
4669 *MaxPrefetchMode = 3;
4671 *MinPrefetchMode = 0;
4672 *MaxPrefetchMode = 3;
4674 } // CalculateMinAndMaxPrefetchMode
4676 void dml32_CalculatePixelDeliveryTimes(
4677 unsigned int NumberOfActiveSurfaces,
4679 double VRatioChroma[],
4680 double VRatioPrefetchY[],
4681 double VRatioPrefetchC[],
4682 unsigned int swath_width_luma_ub[],
4683 unsigned int swath_width_chroma_ub[],
4684 unsigned int DPPPerSurface[],
4686 double HRatioChroma[],
4687 double PixelClock[],
4688 double PSCL_THROUGHPUT[],
4689 double PSCL_THROUGHPUT_CHROMA[],
4691 unsigned int BytePerPixelC[],
4692 enum dm_rotation_angle SourceRotation[],
4693 unsigned int NumberOfCursors[],
4694 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
4695 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
4696 unsigned int BlockWidth256BytesY[],
4697 unsigned int BlockHeight256BytesY[],
4698 unsigned int BlockWidth256BytesC[],
4699 unsigned int BlockHeight256BytesC[],
4702 double DisplayPipeLineDeliveryTimeLuma[],
4703 double DisplayPipeLineDeliveryTimeChroma[],
4704 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
4705 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
4706 double DisplayPipeRequestDeliveryTimeLuma[],
4707 double DisplayPipeRequestDeliveryTimeChroma[],
4708 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
4709 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
4710 double CursorRequestDeliveryTime[],
4711 double CursorRequestDeliveryTimePrefetch[])
4713 double req_per_swath_ub;
4716 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4718 #ifdef __DML_VBA_DEBUG__
4719 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
4720 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
4721 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
4722 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
4723 dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
4724 dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
4725 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
4726 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
4727 dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
4728 dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
4729 dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
4732 if (VRatio[k] <= 1) {
4733 DisplayPipeLineDeliveryTimeLuma[k] =
4734 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4736 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4739 if (BytePerPixelC[k] == 0) {
4740 DisplayPipeLineDeliveryTimeChroma[k] = 0;
4742 if (VRatioChroma[k] <= 1) {
4743 DisplayPipeLineDeliveryTimeChroma[k] =
4744 swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4746 DisplayPipeLineDeliveryTimeChroma[k] =
4747 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4751 if (VRatioPrefetchY[k] <= 1) {
4752 DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4753 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4755 DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4756 swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4759 if (BytePerPixelC[k] == 0) {
4760 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
4762 if (VRatioPrefetchC[k] <= 1) {
4763 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
4764 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4766 DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
4767 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4770 #ifdef __DML_VBA_DEBUG__
4771 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
4772 __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
4773 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
4774 __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
4775 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
4776 __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
4777 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
4778 __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
4782 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4783 if (!IsVertical(SourceRotation[k]))
4784 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
4786 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
4787 #ifdef __DML_VBA_DEBUG__
4788 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
4791 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
4792 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
4793 DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
4794 if (BytePerPixelC[k] == 0) {
4795 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
4796 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
4798 if (!IsVertical(SourceRotation[k]))
4799 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
4801 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
4802 #ifdef __DML_VBA_DEBUG__
4803 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
4805 DisplayPipeRequestDeliveryTimeChroma[k] =
4806 DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
4807 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
4808 DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
4810 #ifdef __DML_VBA_DEBUG__
4811 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
4812 __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
4813 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
4814 __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
4815 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
4816 __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
4817 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
4818 __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
4822 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4823 unsigned int cursor_req_per_width;
4825 cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
4827 if (NumberOfCursors[k] > 0) {
4828 if (VRatio[k] <= 1) {
4829 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4830 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4832 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4833 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4835 if (VRatioPrefetchY[k] <= 1) {
4836 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4837 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4839 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4840 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4843 CursorRequestDeliveryTime[k] = 0;
4844 CursorRequestDeliveryTimePrefetch[k] = 0;
4846 #ifdef __DML_VBA_DEBUG__
4847 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
4848 __func__, k, NumberOfCursors[k]);
4849 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
4850 __func__, k, CursorRequestDeliveryTime[k]);
4851 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
4852 __func__, k, CursorRequestDeliveryTimePrefetch[k]);
4855 } // CalculatePixelDeliveryTimes
4857 void dml32_CalculateMetaAndPTETimes(
4858 bool use_one_row_for_frame[],
4859 unsigned int NumberOfActiveSurfaces,
4861 unsigned int MetaChunkSize,
4862 unsigned int MinMetaChunkSizeBytes,
4863 unsigned int HTotal[],
4865 double VRatioChroma[],
4866 double DestinationLinesToRequestRowInVBlank[],
4867 double DestinationLinesToRequestRowInImmediateFlip[],
4869 double PixelClock[],
4870 unsigned int BytePerPixelY[],
4871 unsigned int BytePerPixelC[],
4872 enum dm_rotation_angle SourceRotation[],
4873 unsigned int dpte_row_height[],
4874 unsigned int dpte_row_height_chroma[],
4875 unsigned int meta_row_width[],
4876 unsigned int meta_row_width_chroma[],
4877 unsigned int meta_row_height[],
4878 unsigned int meta_row_height_chroma[],
4879 unsigned int meta_req_width[],
4880 unsigned int meta_req_width_chroma[],
4881 unsigned int meta_req_height[],
4882 unsigned int meta_req_height_chroma[],
4883 unsigned int dpte_group_bytes[],
4884 unsigned int PTERequestSizeY[],
4885 unsigned int PTERequestSizeC[],
4886 unsigned int PixelPTEReqWidthY[],
4887 unsigned int PixelPTEReqHeightY[],
4888 unsigned int PixelPTEReqWidthC[],
4889 unsigned int PixelPTEReqHeightC[],
4890 unsigned int dpte_row_width_luma_ub[],
4891 unsigned int dpte_row_width_chroma_ub[],
4894 double DST_Y_PER_PTE_ROW_NOM_L[],
4895 double DST_Y_PER_PTE_ROW_NOM_C[],
4896 double DST_Y_PER_META_ROW_NOM_L[],
4897 double DST_Y_PER_META_ROW_NOM_C[],
4898 double TimePerMetaChunkNominal[],
4899 double TimePerChromaMetaChunkNominal[],
4900 double TimePerMetaChunkVBlank[],
4901 double TimePerChromaMetaChunkVBlank[],
4902 double TimePerMetaChunkFlip[],
4903 double TimePerChromaMetaChunkFlip[],
4904 double time_per_pte_group_nom_luma[],
4905 double time_per_pte_group_vblank_luma[],
4906 double time_per_pte_group_flip_luma[],
4907 double time_per_pte_group_nom_chroma[],
4908 double time_per_pte_group_vblank_chroma[],
4909 double time_per_pte_group_flip_chroma[])
4911 unsigned int meta_chunk_width;
4912 unsigned int min_meta_chunk_width;
4913 unsigned int meta_chunk_per_row_int;
4914 unsigned int meta_row_remainder;
4915 unsigned int meta_chunk_threshold;
4916 unsigned int meta_chunks_per_row_ub;
4917 unsigned int meta_chunk_width_chroma;
4918 unsigned int min_meta_chunk_width_chroma;
4919 unsigned int meta_chunk_per_row_int_chroma;
4920 unsigned int meta_row_remainder_chroma;
4921 unsigned int meta_chunk_threshold_chroma;
4922 unsigned int meta_chunks_per_row_ub_chroma;
4923 unsigned int dpte_group_width_luma;
4924 unsigned int dpte_groups_per_row_luma_ub;
4925 unsigned int dpte_group_width_chroma;
4926 unsigned int dpte_groups_per_row_chroma_ub;
4929 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4930 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
4931 if (BytePerPixelC[k] == 0)
4932 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
4934 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
4935 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
4936 if (BytePerPixelC[k] == 0)
4937 DST_Y_PER_META_ROW_NOM_C[k] = 0;
4939 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
4942 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4943 if (DCCEnable[k] == true) {
4944 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
4945 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
4946 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
4947 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
4948 if (!IsVertical(SourceRotation[k]))
4949 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
4951 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
4953 if (meta_row_remainder <= meta_chunk_threshold)
4954 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
4956 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
4958 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
4959 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4960 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4961 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4962 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4963 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4964 if (BytePerPixelC[k] == 0) {
4965 TimePerChromaMetaChunkNominal[k] = 0;
4966 TimePerChromaMetaChunkVBlank[k] = 0;
4967 TimePerChromaMetaChunkFlip[k] = 0;
4969 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
4970 meta_row_height_chroma[k];
4971 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
4972 meta_row_height_chroma[k];
4973 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
4974 meta_chunk_width_chroma;
4975 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
4976 if (!IsVertical(SourceRotation[k])) {
4977 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4978 meta_req_width_chroma[k];
4980 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4981 meta_req_height_chroma[k];
4983 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
4984 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
4986 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
4988 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
4989 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4990 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4991 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4992 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4993 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4996 TimePerMetaChunkNominal[k] = 0;
4997 TimePerMetaChunkVBlank[k] = 0;
4998 TimePerMetaChunkFlip[k] = 0;
4999 TimePerChromaMetaChunkNominal[k] = 0;
5000 TimePerChromaMetaChunkVBlank[k] = 0;
5001 TimePerChromaMetaChunkFlip[k] = 0;
5005 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5006 if (GPUVMEnable == true) {
5007 if (!IsVertical(SourceRotation[k])) {
5008 dpte_group_width_luma = (double) dpte_group_bytes[k] /
5009 (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
5011 dpte_group_width_luma = (double) dpte_group_bytes[k] /
5012 (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
5015 if (use_one_row_for_frame[k]) {
5016 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5017 (double) dpte_group_width_luma / 2.0, 1.0);
5019 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5020 (double) dpte_group_width_luma, 1.0);
5022 #ifdef __DML_VBA_DEBUG__
5023 dml_print("DML::%s: k=%0d, use_one_row_for_frame = %d\n",
5024 __func__, k, use_one_row_for_frame[k]);
5025 dml_print("DML::%s: k=%0d, dpte_group_bytes = %d\n",
5026 __func__, k, dpte_group_bytes[k]);
5027 dml_print("DML::%s: k=%0d, PTERequestSizeY = %d\n",
5028 __func__, k, PTERequestSizeY[k]);
5029 dml_print("DML::%s: k=%0d, PixelPTEReqWidthY = %d\n",
5030 __func__, k, PixelPTEReqWidthY[k]);
5031 dml_print("DML::%s: k=%0d, PixelPTEReqHeightY = %d\n",
5032 __func__, k, PixelPTEReqHeightY[k]);
5033 dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub = %d\n",
5034 __func__, k, dpte_row_width_luma_ub[k]);
5035 dml_print("DML::%s: k=%0d, dpte_group_width_luma = %d\n",
5036 __func__, k, dpte_group_width_luma);
5037 dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub = %d\n",
5038 __func__, k, dpte_groups_per_row_luma_ub);
5041 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
5042 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5043 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
5044 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5045 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5046 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5047 if (BytePerPixelC[k] == 0) {
5048 time_per_pte_group_nom_chroma[k] = 0;
5049 time_per_pte_group_vblank_chroma[k] = 0;
5050 time_per_pte_group_flip_chroma[k] = 0;
5052 if (!IsVertical(SourceRotation[k])) {
5053 dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5054 (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5056 dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5057 (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5060 if (use_one_row_for_frame[k]) {
5061 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5062 (double) dpte_group_width_chroma / 2.0, 1.0);
5064 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5065 (double) dpte_group_width_chroma, 1.0);
5067 #ifdef __DML_VBA_DEBUG__
5068 dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub = %d\n",
5069 __func__, k, dpte_row_width_chroma_ub[k]);
5070 dml_print("DML::%s: k=%0d, dpte_group_width_chroma = %d\n",
5071 __func__, k, dpte_group_width_chroma);
5072 dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub = %d\n",
5073 __func__, k, dpte_groups_per_row_chroma_ub);
5075 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
5076 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5077 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
5078 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5079 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5080 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5083 time_per_pte_group_nom_luma[k] = 0;
5084 time_per_pte_group_vblank_luma[k] = 0;
5085 time_per_pte_group_flip_luma[k] = 0;
5086 time_per_pte_group_nom_chroma[k] = 0;
5087 time_per_pte_group_vblank_chroma[k] = 0;
5088 time_per_pte_group_flip_chroma[k] = 0;
5090 #ifdef __DML_VBA_DEBUG__
5091 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank = %f\n",
5092 __func__, k, DestinationLinesToRequestRowInVBlank[k]);
5093 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip = %f\n",
5094 __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
5095 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L = %f\n",
5096 __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
5097 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C = %f\n",
5098 __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
5099 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L = %f\n",
5100 __func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
5101 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C = %f\n",
5102 __func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
5103 dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal = %f\n",
5104 __func__, k, TimePerMetaChunkNominal[k]);
5105 dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank = %f\n",
5106 __func__, k, TimePerMetaChunkVBlank[k]);
5107 dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip = %f\n",
5108 __func__, k, TimePerMetaChunkFlip[k]);
5109 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal = %f\n",
5110 __func__, k, TimePerChromaMetaChunkNominal[k]);
5111 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank = %f\n",
5112 __func__, k, TimePerChromaMetaChunkVBlank[k]);
5113 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip = %f\n",
5114 __func__, k, TimePerChromaMetaChunkFlip[k]);
5115 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma = %f\n",
5116 __func__, k, time_per_pte_group_nom_luma[k]);
5117 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma = %f\n",
5118 __func__, k, time_per_pte_group_vblank_luma[k]);
5119 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma = %f\n",
5120 __func__, k, time_per_pte_group_flip_luma[k]);
5121 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma = %f\n",
5122 __func__, k, time_per_pte_group_nom_chroma[k]);
5123 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
5124 __func__, k, time_per_pte_group_vblank_chroma[k]);
5125 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma = %f\n",
5126 __func__, k, time_per_pte_group_flip_chroma[k]);
5129 } // CalculateMetaAndPTETimes
5131 void dml32_CalculateVMGroupAndRequestTimes(
5132 unsigned int NumberOfActiveSurfaces,
5134 unsigned int GPUVMMaxPageTableLevels,
5135 unsigned int HTotal[],
5136 unsigned int BytePerPixelC[],
5137 double DestinationLinesToRequestVMInVBlank[],
5138 double DestinationLinesToRequestVMInImmediateFlip[],
5140 double PixelClock[],
5141 unsigned int dpte_row_width_luma_ub[],
5142 unsigned int dpte_row_width_chroma_ub[],
5143 unsigned int vm_group_bytes[],
5144 unsigned int dpde0_bytes_per_frame_ub_l[],
5145 unsigned int dpde0_bytes_per_frame_ub_c[],
5146 unsigned int meta_pte_bytes_per_frame_ub_l[],
5147 unsigned int meta_pte_bytes_per_frame_ub_c[],
5150 double TimePerVMGroupVBlank[],
5151 double TimePerVMGroupFlip[],
5152 double TimePerVMRequestVBlank[],
5153 double TimePerVMRequestFlip[])
5156 unsigned int num_group_per_lower_vm_stage;
5157 unsigned int num_req_per_lower_vm_stage;
5159 #ifdef __DML_VBA_DEBUG__
5160 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
5161 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
5163 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5165 #ifdef __DML_VBA_DEBUG__
5166 dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
5167 dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
5168 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
5169 __func__, k, dpde0_bytes_per_frame_ub_l[k]);
5170 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
5171 __func__, k, dpde0_bytes_per_frame_ub_c[k]);
5172 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
5173 __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
5174 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
5175 __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
5178 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5179 if (DCCEnable[k] == false) {
5180 if (BytePerPixelC[k] > 0) {
5181 num_group_per_lower_vm_stage = dml_ceil(
5182 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5183 (double) (vm_group_bytes[k]), 1.0) +
5184 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5185 (double) (vm_group_bytes[k]), 1.0);
5187 num_group_per_lower_vm_stage = dml_ceil(
5188 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5189 (double) (vm_group_bytes[k]), 1.0);
5192 if (GPUVMMaxPageTableLevels == 1) {
5193 if (BytePerPixelC[k] > 0) {
5194 num_group_per_lower_vm_stage = dml_ceil(
5195 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5196 (double) (vm_group_bytes[k]), 1.0) +
5197 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5198 (double) (vm_group_bytes[k]), 1.0);
5200 num_group_per_lower_vm_stage = dml_ceil(
5201 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5202 (double) (vm_group_bytes[k]), 1.0);
5205 if (BytePerPixelC[k] > 0) {
5206 num_group_per_lower_vm_stage = 2 + dml_ceil(
5207 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5208 (double) (vm_group_bytes[k]), 1) +
5209 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5210 (double) (vm_group_bytes[k]), 1) +
5211 dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
5212 (double) (vm_group_bytes[k]), 1) +
5213 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5214 (double) (vm_group_bytes[k]), 1);
5216 num_group_per_lower_vm_stage = 1 + dml_ceil(
5217 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5218 (double) (vm_group_bytes[k]), 1) + dml_ceil(
5219 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5220 (double) (vm_group_bytes[k]), 1);
5225 if (DCCEnable[k] == false) {
5226 if (BytePerPixelC[k] > 0) {
5227 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
5228 dpde0_bytes_per_frame_ub_c[k] / 64;
5230 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5233 if (GPUVMMaxPageTableLevels == 1) {
5234 if (BytePerPixelC[k] > 0) {
5235 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
5236 meta_pte_bytes_per_frame_ub_c[k] / 64;
5238 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5241 if (BytePerPixelC[k] > 0) {
5242 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5243 64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
5244 meta_pte_bytes_per_frame_ub_l[k] / 64 +
5245 meta_pte_bytes_per_frame_ub_c[k] / 64;
5247 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5248 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5253 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5254 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5255 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5256 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5257 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5258 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5259 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5260 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5262 if (GPUVMMaxPageTableLevels > 2) {
5263 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
5264 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
5265 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
5266 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
5270 TimePerVMGroupVBlank[k] = 0;
5271 TimePerVMGroupFlip[k] = 0;
5272 TimePerVMRequestVBlank[k] = 0;
5273 TimePerVMRequestFlip[k] = 0;
5276 #ifdef __DML_VBA_DEBUG__
5277 dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
5278 dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
5279 dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
5280 dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
5283 } // CalculateVMGroupAndRequestTimes
5285 void dml32_CalculateDCCConfiguration(
5287 bool DCCProgrammingAssumesScanDirectionUnknown,
5288 enum source_format_class SourcePixelFormat,
5289 unsigned int SurfaceWidthLuma,
5290 unsigned int SurfaceWidthChroma,
5291 unsigned int SurfaceHeightLuma,
5292 unsigned int SurfaceHeightChroma,
5293 unsigned int nomDETInKByte,
5294 unsigned int RequestHeight256ByteLuma,
5295 unsigned int RequestHeight256ByteChroma,
5296 enum dm_swizzle_mode TilingFormat,
5297 unsigned int BytePerPixelY,
5298 unsigned int BytePerPixelC,
5299 double BytePerPixelDETY,
5300 double BytePerPixelDETC,
5301 enum dm_rotation_angle SourceRotation,
5303 unsigned int *MaxUncompressedBlockLuma,
5304 unsigned int *MaxUncompressedBlockChroma,
5305 unsigned int *MaxCompressedBlockLuma,
5306 unsigned int *MaxCompressedBlockChroma,
5307 unsigned int *IndependentBlockLuma,
5308 unsigned int *IndependentBlockChroma)
5312 REQ_128BytesNonContiguous,
5313 REQ_128BytesContiguous,
5317 RequestType RequestLuma;
5318 RequestType RequestChroma;
5320 unsigned int segment_order_horz_contiguous_luma;
5321 unsigned int segment_order_horz_contiguous_chroma;
5322 unsigned int segment_order_vert_contiguous_luma;
5323 unsigned int segment_order_vert_contiguous_chroma;
5324 unsigned int req128_horz_wc_l;
5325 unsigned int req128_horz_wc_c;
5326 unsigned int req128_vert_wc_l;
5327 unsigned int req128_vert_wc_c;
5328 unsigned int MAS_vp_horz_limit;
5329 unsigned int MAS_vp_vert_limit;
5330 unsigned int max_vp_horz_width;
5331 unsigned int max_vp_vert_height;
5332 unsigned int eff_surf_width_l;
5333 unsigned int eff_surf_width_c;
5334 unsigned int eff_surf_height_l;
5335 unsigned int eff_surf_height_c;
5336 unsigned int full_swath_bytes_horz_wc_l;
5337 unsigned int full_swath_bytes_horz_wc_c;
5338 unsigned int full_swath_bytes_vert_wc_l;
5339 unsigned int full_swath_bytes_vert_wc_c;
5340 unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
5342 unsigned int yuv420;
5343 unsigned int horz_div_l;
5344 unsigned int horz_div_c;
5345 unsigned int vert_div_l;
5346 unsigned int vert_div_c;
5348 unsigned int swath_buf_size;
5349 double detile_buf_vp_horz_limit;
5350 double detile_buf_vp_vert_limit;
5352 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
5353 SourcePixelFormat == dm_420_12) ? 1 : 0);
5359 if (BytePerPixelY == 1)
5361 if (BytePerPixelC == 1)
5364 if (BytePerPixelC == 0) {
5365 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
5366 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5367 BytePerPixelY / (1 + horz_div_l));
5368 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5371 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
5372 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5373 BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
5374 BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
5375 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5376 (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
5377 (1 + vert_div_c) / (1 + yuv420));
5380 if (SourcePixelFormat == dm_420_10) {
5381 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
5382 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
5385 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
5386 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
5388 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
5389 MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
5390 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
5391 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
5392 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
5393 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
5394 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
5395 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
5397 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
5398 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
5399 if (BytePerPixelC > 0) {
5400 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
5401 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
5403 full_swath_bytes_horz_wc_c = 0;
5404 full_swath_bytes_vert_wc_c = 0;
5407 if (SourcePixelFormat == dm_420_10) {
5408 full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
5409 full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
5410 full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
5411 full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
5414 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5415 req128_horz_wc_l = 0;
5416 req128_horz_wc_c = 0;
5417 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
5418 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5419 req128_horz_wc_l = 0;
5420 req128_horz_wc_c = 1;
5421 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
5422 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5423 req128_horz_wc_l = 1;
5424 req128_horz_wc_c = 0;
5426 req128_horz_wc_l = 1;
5427 req128_horz_wc_c = 1;
5430 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5431 req128_vert_wc_l = 0;
5432 req128_vert_wc_c = 0;
5433 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
5434 full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5435 req128_vert_wc_l = 0;
5436 req128_vert_wc_c = 1;
5437 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
5438 full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5439 req128_vert_wc_l = 1;
5440 req128_vert_wc_c = 0;
5442 req128_vert_wc_l = 1;
5443 req128_vert_wc_c = 1;
5446 if (BytePerPixelY == 2) {
5447 segment_order_horz_contiguous_luma = 0;
5448 segment_order_vert_contiguous_luma = 1;
5450 segment_order_horz_contiguous_luma = 1;
5451 segment_order_vert_contiguous_luma = 0;
5454 if (BytePerPixelC == 2) {
5455 segment_order_horz_contiguous_chroma = 0;
5456 segment_order_vert_contiguous_chroma = 1;
5458 segment_order_horz_contiguous_chroma = 1;
5459 segment_order_vert_contiguous_chroma = 0;
5461 #ifdef __DML_VBA_DEBUG__
5462 dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
5463 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
5464 dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
5465 dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
5466 dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
5467 dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
5468 dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
5469 dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
5470 dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
5471 __func__, segment_order_horz_contiguous_chroma);
5474 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
5475 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
5476 RequestLuma = REQ_256Bytes;
5477 else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
5478 (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
5479 RequestLuma = REQ_128BytesNonContiguous;
5481 RequestLuma = REQ_128BytesContiguous;
5483 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
5484 RequestChroma = REQ_256Bytes;
5485 else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
5486 (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
5487 RequestChroma = REQ_128BytesNonContiguous;
5489 RequestChroma = REQ_128BytesContiguous;
5491 } else if (!IsVertical(SourceRotation)) {
5492 if (req128_horz_wc_l == 0)
5493 RequestLuma = REQ_256Bytes;
5494 else if (segment_order_horz_contiguous_luma == 0)
5495 RequestLuma = REQ_128BytesNonContiguous;
5497 RequestLuma = REQ_128BytesContiguous;
5499 if (req128_horz_wc_c == 0)
5500 RequestChroma = REQ_256Bytes;
5501 else if (segment_order_horz_contiguous_chroma == 0)
5502 RequestChroma = REQ_128BytesNonContiguous;
5504 RequestChroma = REQ_128BytesContiguous;
5507 if (req128_vert_wc_l == 0)
5508 RequestLuma = REQ_256Bytes;
5509 else if (segment_order_vert_contiguous_luma == 0)
5510 RequestLuma = REQ_128BytesNonContiguous;
5512 RequestLuma = REQ_128BytesContiguous;
5514 if (req128_vert_wc_c == 0)
5515 RequestChroma = REQ_256Bytes;
5516 else if (segment_order_vert_contiguous_chroma == 0)
5517 RequestChroma = REQ_128BytesNonContiguous;
5519 RequestChroma = REQ_128BytesContiguous;
5522 if (RequestLuma == REQ_256Bytes) {
5523 *MaxUncompressedBlockLuma = 256;
5524 *MaxCompressedBlockLuma = 256;
5525 *IndependentBlockLuma = 0;
5526 } else if (RequestLuma == REQ_128BytesContiguous) {
5527 *MaxUncompressedBlockLuma = 256;
5528 *MaxCompressedBlockLuma = 128;
5529 *IndependentBlockLuma = 128;
5531 *MaxUncompressedBlockLuma = 256;
5532 *MaxCompressedBlockLuma = 64;
5533 *IndependentBlockLuma = 64;
5536 if (RequestChroma == REQ_256Bytes) {
5537 *MaxUncompressedBlockChroma = 256;
5538 *MaxCompressedBlockChroma = 256;
5539 *IndependentBlockChroma = 0;
5540 } else if (RequestChroma == REQ_128BytesContiguous) {
5541 *MaxUncompressedBlockChroma = 256;
5542 *MaxCompressedBlockChroma = 128;
5543 *IndependentBlockChroma = 128;
5545 *MaxUncompressedBlockChroma = 256;
5546 *MaxCompressedBlockChroma = 64;
5547 *IndependentBlockChroma = 64;
5550 if (DCCEnabled != true || BytePerPixelC == 0) {
5551 *MaxUncompressedBlockChroma = 0;
5552 *MaxCompressedBlockChroma = 0;
5553 *IndependentBlockChroma = 0;
5556 if (DCCEnabled != true) {
5557 *MaxUncompressedBlockLuma = 0;
5558 *MaxCompressedBlockLuma = 0;
5559 *IndependentBlockLuma = 0;
5562 #ifdef __DML_VBA_DEBUG__
5563 dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
5564 dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
5565 dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
5566 dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
5567 dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
5568 dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
5571 } // CalculateDCCConfiguration
5573 void dml32_CalculateStutterEfficiency(
5574 unsigned int CompressedBufferSizeInkByte,
5575 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
5576 bool UnboundedRequestEnabled,
5577 unsigned int MetaFIFOSizeInKEntries,
5578 unsigned int ZeroSizeBufferEntries,
5579 unsigned int PixelChunkSizeInKByte,
5580 unsigned int NumberOfActiveSurfaces,
5581 unsigned int ROBBufferSizeInKByte,
5582 double TotalDataReadBandwidth,
5585 unsigned int CompbufReservedSpace64B,
5586 unsigned int CompbufReservedSpaceZs,
5588 double SRExitZ8Time,
5589 bool SynchronizeTimingsFinal,
5590 unsigned int BlendingAndTiming[],
5591 double StutterEnterPlusExitWatermark,
5592 double Z8StutterEnterPlusExitWatermark,
5593 bool ProgressiveToInterlaceUnitInOPP,
5595 double MinTTUVBlank[],
5596 unsigned int DPPPerSurface[],
5597 unsigned int DETBufferSizeY[],
5598 unsigned int BytePerPixelY[],
5599 double BytePerPixelDETY[],
5600 double SwathWidthY[],
5601 unsigned int SwathHeightY[],
5602 unsigned int SwathHeightC[],
5603 double NetDCCRateLuma[],
5604 double NetDCCRateChroma[],
5605 double DCCFractionOfZeroSizeRequestsLuma[],
5606 double DCCFractionOfZeroSizeRequestsChroma[],
5607 unsigned int HTotal[],
5608 unsigned int VTotal[],
5609 double PixelClock[],
5611 enum dm_rotation_angle SourceRotation[],
5612 unsigned int BlockHeight256BytesY[],
5613 unsigned int BlockWidth256BytesY[],
5614 unsigned int BlockHeight256BytesC[],
5615 unsigned int BlockWidth256BytesC[],
5616 unsigned int DCCYMaxUncompressedBlock[],
5617 unsigned int DCCCMaxUncompressedBlock[],
5618 unsigned int VActive[],
5620 bool WritebackEnable[],
5621 double ReadBandwidthSurfaceLuma[],
5622 double ReadBandwidthSurfaceChroma[],
5623 double meta_row_bw[],
5624 double dpte_row_bw[],
5627 double *StutterEfficiencyNotIncludingVBlank,
5628 double *StutterEfficiency,
5629 unsigned int *NumberOfStutterBurstsPerFrame,
5630 double *Z8StutterEfficiencyNotIncludingVBlank,
5631 double *Z8StutterEfficiency,
5632 unsigned int *Z8NumberOfStutterBurstsPerFrame,
5633 double *StutterPeriod,
5634 bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
5637 bool FoundCriticalSurface = false;
5638 unsigned int SwathSizeCriticalSurface = 0;
5639 unsigned int LastChunkOfSwathSize;
5640 unsigned int MissingPartOfLastSwathOfDETSize;
5641 double LastZ8StutterPeriod = 0.0;
5642 double LastStutterPeriod = 0.0;
5643 unsigned int TotalNumberOfActiveOTG = 0;
5644 double doublePixelClock;
5645 unsigned int doubleHTotal;
5646 unsigned int doubleVTotal;
5647 bool SameTiming = true;
5648 double DETBufferingTimeY;
5649 double SwathWidthYCriticalSurface = 0.0;
5650 double SwathHeightYCriticalSurface = 0.0;
5651 double VActiveTimeCriticalSurface = 0.0;
5652 double FrameTimeCriticalSurface = 0.0;
5653 unsigned int BytePerPixelYCriticalSurface = 0;
5654 double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
5655 unsigned int DETBufferSizeYCriticalSurface = 0;
5656 double MinTTUVBlankCriticalSurface = 0.0;
5657 unsigned int BlockWidth256BytesYCriticalSurface = 0;
5658 bool doublePlaneCriticalSurface = 0;
5659 bool doublePipeCriticalSurface = 0;
5660 double TotalCompressedReadBandwidth;
5661 double TotalRowReadBandwidth;
5662 double AverageDCCCompressionRate;
5663 double EffectiveCompressedBufferSize;
5664 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
5665 double StutterBurstTime;
5666 unsigned int TotalActiveWriteback;
5668 double LinesInDETYRoundedDownToSwath;
5669 double MaximumEffectiveCompressionLuma;
5670 double MaximumEffectiveCompressionChroma;
5671 double TotalZeroSizeRequestReadBandwidth;
5672 double TotalZeroSizeCompressedReadBandwidth;
5673 double AverageDCCZeroSizeFraction;
5674 double AverageZeroSizeCompressionRate;
5677 TotalZeroSizeRequestReadBandwidth = 0;
5678 TotalZeroSizeCompressedReadBandwidth = 0;
5679 TotalRowReadBandwidth = 0;
5680 TotalCompressedReadBandwidth = 0;
5682 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5683 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5684 if (DCCEnable[k] == true) {
5685 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
5686 || (!IsVertical(SourceRotation[k])
5687 && BlockHeight256BytesY[k] > SwathHeightY[k])
5688 || DCCYMaxUncompressedBlock[k] < 256) {
5689 MaximumEffectiveCompressionLuma = 2;
5691 MaximumEffectiveCompressionLuma = 4;
5693 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5694 + ReadBandwidthSurfaceLuma[k]
5695 / dml_min(NetDCCRateLuma[k],
5696 MaximumEffectiveCompressionLuma);
5697 #ifdef __DML_VBA_DEBUG__
5698 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5699 __func__, k, ReadBandwidthSurfaceLuma[k]);
5700 dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
5701 __func__, k, NetDCCRateLuma[k]);
5702 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
5703 __func__, k, MaximumEffectiveCompressionLuma);
5705 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5706 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
5707 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5708 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
5709 / MaximumEffectiveCompressionLuma;
5711 if (ReadBandwidthSurfaceChroma[k] > 0) {
5712 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
5713 || (!IsVertical(SourceRotation[k])
5714 && BlockHeight256BytesC[k] > SwathHeightC[k])
5715 || DCCCMaxUncompressedBlock[k] < 256) {
5716 MaximumEffectiveCompressionChroma = 2;
5718 MaximumEffectiveCompressionChroma = 4;
5720 TotalCompressedReadBandwidth =
5721 TotalCompressedReadBandwidth
5722 + ReadBandwidthSurfaceChroma[k]
5723 / dml_min(NetDCCRateChroma[k],
5724 MaximumEffectiveCompressionChroma);
5725 #ifdef __DML_VBA_DEBUG__
5726 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
5727 __func__, k, ReadBandwidthSurfaceChroma[k]);
5728 dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
5729 __func__, k, NetDCCRateChroma[k]);
5730 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
5731 __func__, k, MaximumEffectiveCompressionChroma);
5733 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5734 + ReadBandwidthSurfaceChroma[k]
5735 * DCCFractionOfZeroSizeRequestsChroma[k];
5736 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5737 + ReadBandwidthSurfaceChroma[k]
5738 * DCCFractionOfZeroSizeRequestsChroma[k]
5739 / MaximumEffectiveCompressionChroma;
5742 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5743 + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
5745 TotalRowReadBandwidth = TotalRowReadBandwidth
5746 + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
5750 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
5751 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
5753 #ifdef __DML_VBA_DEBUG__
5754 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
5755 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
5756 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
5757 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
5758 __func__, TotalZeroSizeCompressedReadBandwidth);
5759 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
5760 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
5761 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5762 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
5763 dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
5764 dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
5765 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
5767 if (AverageDCCZeroSizeFraction == 1) {
5768 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5769 / TotalZeroSizeCompressedReadBandwidth;
5770 EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
5771 * AverageZeroSizeCompressionRate
5772 + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5773 * AverageZeroSizeCompressionRate;
5774 } else if (AverageDCCZeroSizeFraction > 0) {
5775 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5776 / TotalZeroSizeCompressedReadBandwidth;
5777 EffectiveCompressedBufferSize = dml_min(
5778 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5779 (double) MetaFIFOSizeInKEntries * 1024 * 64
5780 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
5781 + 1 / AverageDCCCompressionRate))
5782 + dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5783 * AverageDCCCompressionRate,
5784 ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5785 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5787 #ifdef __DML_VBA_DEBUG__
5788 dml_print("DML::%s: min 1 = %f\n", __func__,
5789 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5790 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
5791 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
5792 AverageDCCCompressionRate));
5793 dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
5794 CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
5795 dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
5796 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5799 EffectiveCompressedBufferSize = dml_min(
5800 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5801 (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
5802 + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5803 * AverageDCCCompressionRate;
5805 #ifdef __DML_VBA_DEBUG__
5806 dml_print("DML::%s: min 1 = %f\n", __func__,
5807 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5808 dml_print("DML::%s: min 2 = %f\n", __func__,
5809 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
5813 #ifdef __DML_VBA_DEBUG__
5814 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
5815 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
5816 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5821 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5822 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5823 LinesInDETY = ((double) DETBufferSizeY[k]
5824 + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
5825 * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
5826 / BytePerPixelDETY[k] / SwathWidthY[k];
5827 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
5828 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
5830 #ifdef __DML_VBA_DEBUG__
5831 dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
5832 dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
5833 dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
5834 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5835 __func__, k, ReadBandwidthSurfaceLuma[k]);
5836 dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
5837 dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
5838 dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
5839 __func__, k, LinesInDETYRoundedDownToSwath);
5840 dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
5841 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5842 dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
5843 dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
5844 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5847 if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
5848 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
5850 FoundCriticalSurface = true;
5851 *StutterPeriod = DETBufferingTimeY;
5852 FrameTimeCriticalSurface = (
5854 dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k])
5855 * (double) HTotal[k] / PixelClock[k];
5856 VActiveTimeCriticalSurface = (
5858 dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k])
5859 * (double) HTotal[k] / PixelClock[k];
5860 BytePerPixelYCriticalSurface = BytePerPixelY[k];
5861 SwathWidthYCriticalSurface = SwathWidthY[k];
5862 SwathHeightYCriticalSurface = SwathHeightY[k];
5863 BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
5864 LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
5865 - (LinesInDETY - LinesInDETYRoundedDownToSwath);
5866 DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
5867 MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
5868 doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
5869 doublePipeCriticalSurface = (DPPPerSurface[k] == 1);
5871 #ifdef __DML_VBA_DEBUG__
5872 dml_print("DML::%s: k=%0d, FoundCriticalSurface = %d\n",
5873 __func__, k, FoundCriticalSurface);
5874 dml_print("DML::%s: k=%0d, StutterPeriod = %f\n",
5875 __func__, k, *StutterPeriod);
5876 dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface = %f\n",
5877 __func__, k, MinTTUVBlankCriticalSurface);
5878 dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface = %f\n",
5879 __func__, k, FrameTimeCriticalSurface);
5880 dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface = %f\n",
5881 __func__, k, VActiveTimeCriticalSurface);
5882 dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface = %d\n",
5883 __func__, k, BytePerPixelYCriticalSurface);
5884 dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface = %f\n",
5885 __func__, k, SwathWidthYCriticalSurface);
5886 dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface = %f\n",
5887 __func__, k, SwathHeightYCriticalSurface);
5888 dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface = %d\n",
5889 __func__, k, BlockWidth256BytesYCriticalSurface);
5890 dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface = %d\n",
5891 __func__, k, doublePlaneCriticalSurface);
5892 dml_print("DML::%s: k=%0d, doublePipeCriticalSurface = %d\n",
5893 __func__, k, doublePipeCriticalSurface);
5894 dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
5895 __func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
5901 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth,
5902 EffectiveCompressedBufferSize);
5903 #ifdef __DML_VBA_DEBUG__
5904 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
5905 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5906 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5907 __func__, *StutterPeriod * TotalDataReadBandwidth);
5908 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5909 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
5910 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
5911 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
5912 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
5913 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
5914 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5917 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
5919 + (*StutterPeriod * TotalDataReadBandwidth
5920 - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
5921 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
5922 #ifdef __DML_VBA_DEBUG__
5923 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
5924 AverageDCCCompressionRate / ReturnBW);
5925 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5926 __func__, (*StutterPeriod * TotalDataReadBandwidth));
5927 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
5928 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
5929 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
5930 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5932 StutterBurstTime = dml_max(StutterBurstTime,
5933 LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
5934 * SwathWidthYCriticalSurface / ReturnBW);
5936 #ifdef __DML_VBA_DEBUG__
5937 dml_print("DML::%s: Time to finish residue swath=%f\n",
5939 LinesToFinishSwathTransferStutterCriticalSurface *
5940 BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
5943 TotalActiveWriteback = 0;
5944 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5945 if (WritebackEnable[k])
5946 TotalActiveWriteback = TotalActiveWriteback + 1;
5949 if (TotalActiveWriteback == 0) {
5950 #ifdef __DML_VBA_DEBUG__
5951 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
5952 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
5953 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
5954 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5956 *StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5957 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
5958 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5959 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
5960 *NumberOfStutterBurstsPerFrame = (
5961 *StutterEfficiencyNotIncludingVBlank > 0 ?
5962 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5963 *Z8NumberOfStutterBurstsPerFrame = (
5964 *Z8StutterEfficiencyNotIncludingVBlank > 0 ?
5965 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5967 *StutterEfficiencyNotIncludingVBlank = 0.;
5968 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
5969 *NumberOfStutterBurstsPerFrame = 0;
5970 *Z8NumberOfStutterBurstsPerFrame = 0;
5972 #ifdef __DML_VBA_DEBUG__
5973 dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
5974 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5975 __func__, *StutterEfficiencyNotIncludingVBlank);
5976 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
5977 __func__, *Z8StutterEfficiencyNotIncludingVBlank);
5978 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
5979 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5982 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5983 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5984 if (BlendingAndTiming[k] == k) {
5985 if (TotalNumberOfActiveOTG == 0) {
5986 doublePixelClock = PixelClock[k];
5987 doubleHTotal = HTotal[k];
5988 doubleVTotal = VTotal[k];
5989 } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
5990 || doubleVTotal != VTotal[k]) {
5993 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
5998 if (*StutterEfficiencyNotIncludingVBlank > 0) {
5999 LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6001 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
6002 && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
6003 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
6004 + StutterBurstTime * VActiveTimeCriticalSurface
6005 / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6007 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6010 *StutterEfficiency = 0;
6013 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6014 LastZ8StutterPeriod = VActiveTimeCriticalSurface
6015 - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6016 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
6017 MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
6018 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
6019 * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6021 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6024 *Z8StutterEfficiency = 0.;
6027 #ifdef __DML_VBA_DEBUG__
6028 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6029 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6030 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6031 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6032 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6033 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6034 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
6035 __func__, *StutterEfficiencyNotIncludingVBlank);
6036 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6039 SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
6040 * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface);
6041 LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
6042 MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface)
6043 - DETBufferSizeYCriticalSurface;
6045 *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
6046 && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
6047 && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
6048 && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
6050 #ifdef __DML_VBA_DEBUG__
6051 dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
6052 dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
6053 dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
6054 dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
6056 } // CalculateStutterEfficiency
6058 void dml32_CalculateMaxDETAndMinCompressedBufferSize(
6059 unsigned int ConfigReturnBufferSizeInKByte,
6060 unsigned int ROBBufferSizeInKByte,
6061 unsigned int MaxNumDPP,
6062 bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
6063 unsigned int nomDETInKByteOverrideValue, // VBA_DELTA
6066 unsigned int *MaxTotalDETInKByte,
6067 unsigned int *nomDETInKByte,
6068 unsigned int *MinCompressedBufferSizeInKByte)
6070 bool det_buff_size_override_en = nomDETInKByteOverrideEnable;
6071 unsigned int det_buff_size_override_val = nomDETInKByteOverrideValue;
6073 *MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte +
6074 (double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64);
6075 *nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64);
6076 *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
6078 #ifdef __DML_VBA_DEBUG__
6079 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
6080 dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
6081 dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
6082 dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
6083 dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
6084 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
6087 if (det_buff_size_override_en) {
6088 *nomDETInKByte = det_buff_size_override_val;
6089 #ifdef __DML_VBA_DEBUG__
6090 dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
6093 } // CalculateMaxDETAndMinCompressedBufferSize
6095 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
6097 bool NotUrgentLatencyHiding[],
6098 double ReadBandwidthLuma[],
6099 double ReadBandwidthChroma[],
6101 double meta_row_bandwidth[],
6102 double dpte_row_bandwidth[],
6103 unsigned int NumberOfDPP[],
6104 double UrgentBurstFactorLuma[],
6105 double UrgentBurstFactorChroma[],
6106 double UrgentBurstFactorCursor[])
6109 bool NotEnoughUrgentLatencyHiding = false;
6110 bool CalculateVActiveBandwithSupport_val = false;
6111 double VActiveBandwith = 0;
6113 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6114 if (NotUrgentLatencyHiding[k]) {
6115 NotEnoughUrgentLatencyHiding = true;
6119 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6120 VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
6123 CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6125 #ifdef __DML_VBA_DEBUG__
6126 dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, NotEnoughUrgentLatencyHiding);
6127 dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith);
6128 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6129 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
6131 return CalculateVActiveBandwithSupport_val;
6134 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
6136 bool NotUrgentLatencyHiding[],
6137 double ReadBandwidthLuma[],
6138 double ReadBandwidthChroma[],
6139 double PrefetchBandwidthLuma[],
6140 double PrefetchBandwidthChroma[],
6142 double meta_row_bandwidth[],
6143 double dpte_row_bandwidth[],
6144 double cursor_bw_pre[],
6145 double prefetch_vmrow_bw[],
6146 unsigned int NumberOfDPP[],
6147 double UrgentBurstFactorLuma[],
6148 double UrgentBurstFactorChroma[],
6149 double UrgentBurstFactorCursor[],
6150 double UrgentBurstFactorLumaPre[],
6151 double UrgentBurstFactorChromaPre[],
6152 double UrgentBurstFactorCursorPre[],
6155 double *PrefetchBandwidth,
6156 double *FractionOfUrgentBandwidth,
6157 bool *PrefetchBandwidthSupport)
6160 bool NotEnoughUrgentLatencyHiding = false;
6161 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6162 if (NotUrgentLatencyHiding[k]) {
6163 NotEnoughUrgentLatencyHiding = true;
6167 *PrefetchBandwidth = 0;
6168 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6169 *PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6170 ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]),
6171 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6174 *PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6175 *FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW;
6178 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
6180 double ReadBandwidthLuma[],
6181 double ReadBandwidthChroma[],
6182 double PrefetchBandwidthLuma[],
6183 double PrefetchBandwidthChroma[],
6185 double cursor_bw_pre[],
6186 unsigned int NumberOfDPP[],
6187 double UrgentBurstFactorLuma[],
6188 double UrgentBurstFactorChroma[],
6189 double UrgentBurstFactorCursor[],
6190 double UrgentBurstFactorLumaPre[],
6191 double UrgentBurstFactorChromaPre[],
6192 double UrgentBurstFactorCursorPre[])
6195 double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;
6197 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6198 CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6199 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6202 return CalculateBandwidthAvailableForImmediateFlip_val;
6205 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
6207 enum immediate_flip_requirement ImmediateFlipRequirement[],
6208 double final_flip_bw[],
6209 double ReadBandwidthLuma[],
6210 double ReadBandwidthChroma[],
6211 double PrefetchBandwidthLuma[],
6212 double PrefetchBandwidthChroma[],
6214 double meta_row_bandwidth[],
6215 double dpte_row_bandwidth[],
6216 double cursor_bw_pre[],
6217 double prefetch_vmrow_bw[],
6218 unsigned int NumberOfDPP[],
6219 double UrgentBurstFactorLuma[],
6220 double UrgentBurstFactorChroma[],
6221 double UrgentBurstFactorCursor[],
6222 double UrgentBurstFactorLumaPre[],
6223 double UrgentBurstFactorChromaPre[],
6224 double UrgentBurstFactorCursorPre[],
6227 double *TotalBandwidth,
6228 double *FractionOfUrgentBandwidth,
6229 bool *ImmediateFlipBandwidthSupport)
6232 *TotalBandwidth = 0;
6233 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6234 if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
6235 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6236 NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6237 NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6239 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6240 NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6241 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6244 *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
6245 *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;