Merge tag 'amd-drm-next-6.1-2022-09-08' of https://gitlab.freedesktop.org/agd5f/linux...
[linux-2.6-block.git] / drivers / gpu / drm / amd / display / dc / dml / dcn32 / display_mode_vba_util_32.c
1 /*
2  * Copyright 2022 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: AMD
23  *
24  */
25 #include "display_mode_vba_util_32.h"
26 #include "../dml_inline_defs.h"
27 #include "display_mode_vba_32.h"
28 #include "../display_mode_lib.h"
29
30 unsigned int dml32_dscceComputeDelay(
31                 unsigned int bpc,
32                 double BPP,
33                 unsigned int sliceWidth,
34                 unsigned int numSlices,
35                 enum output_format_class pixelFormat,
36                 enum output_encoder_class Output)
37 {
38         // valid bpc         = source bits per component in the set of {8, 10, 12}
39         // valid bpp         = increments of 1/16 of a bit
40         //                    min = 6/7/8 in N420/N422/444, respectively
41         //                    max = such that compression is 1:1
42         //valid sliceWidth  = number of pixels per slice line,
43         //      must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
44         //valid numSlices   = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
45         //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
46
47         // fixed value
48         unsigned int rcModelSize = 8192;
49
50         // N422/N420 operate at 2 pixels per clock
51         unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
52         Delay, pixels;
53
54         if (pixelFormat == dm_420)
55                 pixelsPerClock = 2;
56         else if (pixelFormat == dm_n422)
57                 pixelsPerClock = 2;
58         // #all other modes operate at 1 pixel per clock
59         else
60                 pixelsPerClock = 1;
61
62         //initial transmit delay as per PPS
63         initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
64
65         //compute ssm delay
66         if (bpc == 8)
67                 D = 81;
68         else if (bpc == 10)
69                 D = 89;
70         else
71                 D = 113;
72
73         //divide by pixel per cycle to compute slice width as seen by DSC
74         w = sliceWidth / pixelsPerClock;
75
76         //422 mode has an additional cycle of delay
77         if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
78                 s = 0;
79         else
80                 s = 1;
81
82         //main calculation for the dscce
83         ix = initalXmitDelay + 45;
84         wx = (w + 2) / 3;
85         p = 3 * wx - w;
86         l0 = ix / w;
87         a = ix + p * l0;
88         ax = (a + 2) / 3 + D + 6 + 1;
89         L = (ax + wx - 1) / wx;
90         if ((ix % w) == 0 && p != 0)
91                 lstall = 1;
92         else
93                 lstall = 0;
94         Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
95
96         //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
97         pixels = Delay * 3 * pixelsPerClock;
98
99 #ifdef __DML_VBA_DEBUG__
100         dml_print("DML::%s: bpc: %d\n", __func__, bpc);
101         dml_print("DML::%s: BPP: %f\n", __func__, BPP);
102         dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
103         dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
104         dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
105         dml_print("DML::%s: Output: %d\n", __func__, Output);
106         dml_print("DML::%s: pixels: %d\n", __func__, pixels);
107 #endif
108
109         return pixels;
110 }
111
112 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
113 {
114         unsigned int Delay = 0;
115
116         if (pixelFormat == dm_420) {
117                 //   sfr
118                 Delay = Delay + 2;
119                 //   dsccif
120                 Delay = Delay + 0;
121                 //   dscc - input deserializer
122                 Delay = Delay + 3;
123                 //   dscc gets pixels every other cycle
124                 Delay = Delay + 2;
125                 //   dscc - input cdc fifo
126                 Delay = Delay + 12;
127                 //   dscc gets pixels every other cycle
128                 Delay = Delay + 13;
129                 //   dscc - cdc uncertainty
130                 Delay = Delay + 2;
131                 //   dscc - output cdc fifo
132                 Delay = Delay + 7;
133                 //   dscc gets pixels every other cycle
134                 Delay = Delay + 3;
135                 //   dscc - cdc uncertainty
136                 Delay = Delay + 2;
137                 //   dscc - output serializer
138                 Delay = Delay + 1;
139                 //   sft
140                 Delay = Delay + 1;
141         } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
142                 //   sfr
143                 Delay = Delay + 2;
144                 //   dsccif
145                 Delay = Delay + 1;
146                 //   dscc - input deserializer
147                 Delay = Delay + 5;
148                 //  dscc - input cdc fifo
149                 Delay = Delay + 25;
150                 //   dscc - cdc uncertainty
151                 Delay = Delay + 2;
152                 //   dscc - output cdc fifo
153                 Delay = Delay + 10;
154                 //   dscc - cdc uncertainty
155                 Delay = Delay + 2;
156                 //   dscc - output serializer
157                 Delay = Delay + 1;
158                 //   sft
159                 Delay = Delay + 1;
160         } else {
161                 //   sfr
162                 Delay = Delay + 2;
163                 //   dsccif
164                 Delay = Delay + 0;
165                 //   dscc - input deserializer
166                 Delay = Delay + 3;
167                 //   dscc - input cdc fifo
168                 Delay = Delay + 12;
169                 //   dscc - cdc uncertainty
170                 Delay = Delay + 2;
171                 //   dscc - output cdc fifo
172                 Delay = Delay + 7;
173                 //   dscc - output serializer
174                 Delay = Delay + 1;
175                 //   dscc - cdc uncertainty
176                 Delay = Delay + 2;
177                 //   sft
178                 Delay = Delay + 1;
179         }
180
181         return Delay;
182 }
183
184
185 bool IsVertical(enum dm_rotation_angle Scan)
186 {
187         bool is_vert = false;
188
189         if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
190                 is_vert = true;
191         else
192                 is_vert = false;
193         return is_vert;
194 }
195
196 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
197                 double HRatio,
198                 double HRatioChroma,
199                 double VRatio,
200                 double VRatioChroma,
201                 double MaxDCHUBToPSCLThroughput,
202                 double MaxPSCLToLBThroughput,
203                 double PixelClock,
204                 enum source_format_class SourcePixelFormat,
205                 unsigned int HTaps,
206                 unsigned int HTapsChroma,
207                 unsigned int VTaps,
208                 unsigned int VTapsChroma,
209
210                 /* output */
211                 double *PSCL_THROUGHPUT,
212                 double *PSCL_THROUGHPUT_CHROMA,
213                 double *DPPCLKUsingSingleDPP)
214 {
215         double DPPCLKUsingSingleDPPLuma;
216         double DPPCLKUsingSingleDPPChroma;
217
218         if (HRatio > 1) {
219                 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
220                                 dml_ceil((double) HTaps / 6.0, 1.0));
221         } else {
222                 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
223         }
224
225         DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
226                         *PSCL_THROUGHPUT, 1);
227
228         if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
229                 DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
230
231         if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
232                         SourcePixelFormat != dm_rgbe_alpha)) {
233                 *PSCL_THROUGHPUT_CHROMA = 0;
234                 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
235         } else {
236                 if (HRatioChroma > 1) {
237                         *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
238                                         HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
239                 } else {
240                         *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
241                 }
242                 DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
243                                 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
244                 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
245                         DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
246                 *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
247         }
248 }
249
250 void dml32_CalculateBytePerPixelAndBlockSizes(
251                 enum source_format_class SourcePixelFormat,
252                 enum dm_swizzle_mode SurfaceTiling,
253
254                 /* Output */
255                 unsigned int *BytePerPixelY,
256                 unsigned int *BytePerPixelC,
257                 double  *BytePerPixelDETY,
258                 double  *BytePerPixelDETC,
259                 unsigned int *BlockHeight256BytesY,
260                 unsigned int *BlockHeight256BytesC,
261                 unsigned int *BlockWidth256BytesY,
262                 unsigned int *BlockWidth256BytesC,
263                 unsigned int *MacroTileHeightY,
264                 unsigned int *MacroTileHeightC,
265                 unsigned int *MacroTileWidthY,
266                 unsigned int *MacroTileWidthC)
267 {
268         if (SourcePixelFormat == dm_444_64) {
269                 *BytePerPixelDETY = 8;
270                 *BytePerPixelDETC = 0;
271                 *BytePerPixelY = 8;
272                 *BytePerPixelC = 0;
273         } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
274                 *BytePerPixelDETY = 4;
275                 *BytePerPixelDETC = 0;
276                 *BytePerPixelY = 4;
277                 *BytePerPixelC = 0;
278         } else if (SourcePixelFormat == dm_444_16) {
279                 *BytePerPixelDETY = 2;
280                 *BytePerPixelDETC = 0;
281                 *BytePerPixelY = 2;
282                 *BytePerPixelC = 0;
283         } else if (SourcePixelFormat == dm_444_8) {
284                 *BytePerPixelDETY = 1;
285                 *BytePerPixelDETC = 0;
286                 *BytePerPixelY = 1;
287                 *BytePerPixelC = 0;
288         } else if (SourcePixelFormat == dm_rgbe_alpha) {
289                 *BytePerPixelDETY = 4;
290                 *BytePerPixelDETC = 1;
291                 *BytePerPixelY = 4;
292                 *BytePerPixelC = 1;
293         } else if (SourcePixelFormat == dm_420_8) {
294                 *BytePerPixelDETY = 1;
295                 *BytePerPixelDETC = 2;
296                 *BytePerPixelY = 1;
297                 *BytePerPixelC = 2;
298         } else if (SourcePixelFormat == dm_420_12) {
299                 *BytePerPixelDETY = 2;
300                 *BytePerPixelDETC = 4;
301                 *BytePerPixelY = 2;
302                 *BytePerPixelC = 4;
303         } else {
304                 *BytePerPixelDETY = 4.0 / 3;
305                 *BytePerPixelDETC = 8.0 / 3;
306                 *BytePerPixelY = 2;
307                 *BytePerPixelC = 4;
308         }
309 #ifdef __DML_VBA_DEBUG__
310         dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
311         dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
312         dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
313         dml_print("DML::%s: BytePerPixelY    = %d\n", __func__, *BytePerPixelY);
314         dml_print("DML::%s: BytePerPixelC    = %d\n", __func__, *BytePerPixelC);
315 #endif
316         if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
317                         || SourcePixelFormat == dm_444_16
318                         || SourcePixelFormat == dm_444_8
319                         || SourcePixelFormat == dm_mono_16
320                         || SourcePixelFormat == dm_mono_8
321                         || SourcePixelFormat == dm_rgbe)) {
322                 if (SurfaceTiling == dm_sw_linear)
323                         *BlockHeight256BytesY = 1;
324                 else if (SourcePixelFormat == dm_444_64)
325                         *BlockHeight256BytesY = 4;
326                 else if (SourcePixelFormat == dm_444_8)
327                         *BlockHeight256BytesY = 16;
328                 else
329                         *BlockHeight256BytesY = 8;
330
331                 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
332                 *BlockHeight256BytesC = 0;
333                 *BlockWidth256BytesC = 0;
334         } else {
335                 if (SurfaceTiling == dm_sw_linear) {
336                         *BlockHeight256BytesY = 1;
337                         *BlockHeight256BytesC = 1;
338                 } else if (SourcePixelFormat == dm_rgbe_alpha) {
339                         *BlockHeight256BytesY = 8;
340                         *BlockHeight256BytesC = 16;
341                 } else if (SourcePixelFormat == dm_420_8) {
342                         *BlockHeight256BytesY = 16;
343                         *BlockHeight256BytesC = 8;
344                 } else {
345                         *BlockHeight256BytesY = 8;
346                         *BlockHeight256BytesC = 8;
347                 }
348                 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
349                 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
350         }
351 #ifdef __DML_VBA_DEBUG__
352         dml_print("DML::%s: BlockWidth256BytesY  = %d\n", __func__, *BlockWidth256BytesY);
353         dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
354         dml_print("DML::%s: BlockWidth256BytesC  = %d\n", __func__, *BlockWidth256BytesC);
355         dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
356 #endif
357
358         if (SurfaceTiling == dm_sw_linear) {
359                 *MacroTileHeightY = *BlockHeight256BytesY;
360                 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
361                 *MacroTileHeightC = *BlockHeight256BytesC;
362                 if (*MacroTileHeightC == 0)
363                         *MacroTileWidthC = 0;
364                 else
365                         *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
366         } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
367                         SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
368                 *MacroTileHeightY = 16 * *BlockHeight256BytesY;
369                 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
370                 *MacroTileHeightC = 16 * *BlockHeight256BytesC;
371                 if (*MacroTileHeightC == 0)
372                         *MacroTileWidthC = 0;
373                 else
374                         *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
375         } else {
376                 *MacroTileHeightY = 32 * *BlockHeight256BytesY;
377                 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
378                 *MacroTileHeightC = 32 * *BlockHeight256BytesC;
379                 if (*MacroTileHeightC == 0)
380                         *MacroTileWidthC = 0;
381                 else
382                         *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
383         }
384
385 #ifdef __DML_VBA_DEBUG__
386         dml_print("DML::%s: MacroTileWidthY  = %d\n", __func__, *MacroTileWidthY);
387         dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
388         dml_print("DML::%s: MacroTileWidthC  = %d\n", __func__, *MacroTileWidthC);
389         dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
390 #endif
391 } // CalculateBytePerPixelAndBlockSizes
392
393 void dml32_CalculateSwathAndDETConfiguration(
394                 unsigned int DETSizeOverride[],
395                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
396                 unsigned int ConfigReturnBufferSizeInKByte,
397                 unsigned int MaxTotalDETInKByte,
398                 unsigned int MinCompressedBufferSizeInKByte,
399                 double ForceSingleDPP,
400                 unsigned int NumberOfActiveSurfaces,
401                 unsigned int nomDETInKByte,
402                 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
403                 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
404                 unsigned int PixelChunkSizeKBytes,
405                 unsigned int ROBSizeKBytes,
406                 unsigned int CompressedBufferSegmentSizeInkByteFinal,
407                 enum output_encoder_class Output[],
408                 double ReadBandwidthLuma[],
409                 double ReadBandwidthChroma[],
410                 double MaximumSwathWidthLuma[],
411                 double MaximumSwathWidthChroma[],
412                 enum dm_rotation_angle SourceRotation[],
413                 bool ViewportStationary[],
414                 enum source_format_class SourcePixelFormat[],
415                 enum dm_swizzle_mode SurfaceTiling[],
416                 unsigned int ViewportWidth[],
417                 unsigned int ViewportHeight[],
418                 unsigned int ViewportXStart[],
419                 unsigned int ViewportYStart[],
420                 unsigned int ViewportXStartC[],
421                 unsigned int ViewportYStartC[],
422                 unsigned int SurfaceWidthY[],
423                 unsigned int SurfaceWidthC[],
424                 unsigned int SurfaceHeightY[],
425                 unsigned int SurfaceHeightC[],
426                 unsigned int Read256BytesBlockHeightY[],
427                 unsigned int Read256BytesBlockHeightC[],
428                 unsigned int Read256BytesBlockWidthY[],
429                 unsigned int Read256BytesBlockWidthC[],
430                 enum odm_combine_mode ODMMode[],
431                 unsigned int BlendingAndTiming[],
432                 unsigned int BytePerPixY[],
433                 unsigned int BytePerPixC[],
434                 double BytePerPixDETY[],
435                 double BytePerPixDETC[],
436                 unsigned int HActive[],
437                 double HRatio[],
438                 double HRatioChroma[],
439                 unsigned int DPPPerSurface[],
440
441                 /* Output */
442                 unsigned int swath_width_luma_ub[],
443                 unsigned int swath_width_chroma_ub[],
444                 double SwathWidth[],
445                 double SwathWidthChroma[],
446                 unsigned int SwathHeightY[],
447                 unsigned int SwathHeightC[],
448                 unsigned int DETBufferSizeInKByte[],
449                 unsigned int DETBufferSizeY[],
450                 unsigned int DETBufferSizeC[],
451                 bool *UnboundedRequestEnabled,
452                 unsigned int *CompressedBufferSizeInkByte,
453                 unsigned int *CompBufReservedSpaceKBytes,
454                 bool *CompBufReservedSpaceNeedAdjustment,
455                 bool ViewportSizeSupportPerSurface[],
456                 bool *ViewportSizeSupport)
457 {
458         unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
459         unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
460         unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
461         unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
462         unsigned int RoundedUpSwathSizeBytesY;
463         unsigned int RoundedUpSwathSizeBytesC;
464         double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
465         double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
466         unsigned int k;
467         unsigned int TotalActiveDPP = 0;
468         bool NoChromaSurfaces = true;
469         unsigned int DETBufferSizeInKByteForSwathCalculation;
470
471 #ifdef __DML_VBA_DEBUG__
472         dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
473         dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes);
474         dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes);
475 #endif
476         dml32_CalculateSwathWidth(ForceSingleDPP,
477                         NumberOfActiveSurfaces,
478                         SourcePixelFormat,
479                         SourceRotation,
480                         ViewportStationary,
481                         ViewportWidth,
482                         ViewportHeight,
483                         ViewportXStart,
484                         ViewportYStart,
485                         ViewportXStartC,
486                         ViewportYStartC,
487                         SurfaceWidthY,
488                         SurfaceWidthC,
489                         SurfaceHeightY,
490                         SurfaceHeightC,
491                         ODMMode,
492                         BytePerPixY,
493                         BytePerPixC,
494                         Read256BytesBlockHeightY,
495                         Read256BytesBlockHeightC,
496                         Read256BytesBlockWidthY,
497                         Read256BytesBlockWidthC,
498                         BlendingAndTiming,
499                         HActive,
500                         HRatio,
501                         DPPPerSurface,
502
503                         /* Output */
504                         SwathWidthdoubleDPP,
505                         SwathWidthdoubleDPPChroma,
506                         SwathWidth,
507                         SwathWidthChroma,
508                         MaximumSwathHeightY,
509                         MaximumSwathHeightC,
510                         swath_width_luma_ub,
511                         swath_width_chroma_ub);
512
513         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
514                 RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
515                 RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
516 #ifdef __DML_VBA_DEBUG__
517                 dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
518                 dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
519                 dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
520                 dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]);
521                 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
522                                 RoundedUpMaxSwathSizeBytesY[k]);
523                 dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
524                 dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
525                 dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]);
526                 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
527                                 RoundedUpMaxSwathSizeBytesC[k]);
528 #endif
529
530                 if (SourcePixelFormat[k] == dm_420_10) {
531                         RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256);
532                         RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256);
533                 }
534         }
535
536         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
537                 TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
538                 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
539                                 SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
540                         NoChromaSurfaces = false;
541                 }
542         }
543
544         // By default, just set the reserved space to 2 pixel chunks size
545         *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2;
546
547         // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
548         // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
549         // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
550         *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512);
551
552         if (*CompBufReservedSpaceNeedAdjustment == 1) {
553                 *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512;
554         }
555
556         #ifdef __DML_VBA_DEBUG__
557                 dml_print("DML::%s: CompBufReservedSpaceKBytes          = %d\n",  __func__, *CompBufReservedSpaceKBytes);
558                 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, *CompBufReservedSpaceNeedAdjustment);
559         #endif
560
561         *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
562
563         dml32_CalculateDETBufferSize(DETSizeOverride,
564                         UseMALLForPStateChange,
565                         ForceSingleDPP,
566                         NumberOfActiveSurfaces,
567                         *UnboundedRequestEnabled,
568                         nomDETInKByte,
569                         MaxTotalDETInKByte,
570                         ConfigReturnBufferSizeInKByte,
571                         MinCompressedBufferSizeInKByte,
572                         CompressedBufferSegmentSizeInkByteFinal,
573                         SourcePixelFormat,
574                         ReadBandwidthLuma,
575                         ReadBandwidthChroma,
576                         RoundedUpMaxSwathSizeBytesY,
577                         RoundedUpMaxSwathSizeBytesC,
578                         DPPPerSurface,
579
580                         /* Output */
581                         DETBufferSizeInKByte,    // per hubp pipe
582                         CompressedBufferSizeInkByte);
583
584 #ifdef __DML_VBA_DEBUG__
585         dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
586         dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
587         dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
588         dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
589         dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
590         dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
591 #endif
592
593         *ViewportSizeSupport = true;
594         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
595
596                 DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
597                                 dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
598 #ifdef __DML_VBA_DEBUG__
599                 dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
600                                 DETBufferSizeInKByteForSwathCalculation);
601 #endif
602
603                 if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
604                                 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
605                         SwathHeightY[k] = MaximumSwathHeightY[k];
606                         SwathHeightC[k] = MaximumSwathHeightC[k];
607                         RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
608                         RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
609                 } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
610                                 RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
611                                 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
612                         SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
613                         SwathHeightC[k] = MaximumSwathHeightC[k];
614                         RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
615                         RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
616                 } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
617                                 RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
618                                 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
619                         SwathHeightY[k] = MaximumSwathHeightY[k];
620                         SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
621                         RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
622                         RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
623                 } else {
624                         SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
625                         SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
626                         RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
627                         RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
628                 }
629
630                 if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
631                                 DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
632                                 || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
633                                                 SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
634                         *ViewportSizeSupport = false;
635                         ViewportSizeSupportPerSurface[k] = false;
636                 } else {
637                         ViewportSizeSupportPerSurface[k] = true;
638                 }
639
640                 if (SwathHeightC[k] == 0) {
641 #ifdef __DML_VBA_DEBUG__
642                         dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
643 #endif
644                         DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
645                         DETBufferSizeC[k] = 0;
646                 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
647 #ifdef __DML_VBA_DEBUG__
648                         dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
649 #endif
650                         DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
651                         DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
652                 } else {
653 #ifdef __DML_VBA_DEBUG__
654                         dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
655 #endif
656                         DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
657                         DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
658                 }
659
660 #ifdef __DML_VBA_DEBUG__
661                 dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
662                 dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
663                 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
664                                 k, RoundedUpMaxSwathSizeBytesY[k]);
665                 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
666                                 k, RoundedUpMaxSwathSizeBytesC[k]);
667                 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY);
668                 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC);
669                 dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
670                 dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
671                 dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
672                 dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
673                                 ViewportSizeSupportPerSurface[k]);
674 #endif
675
676         }
677 } // CalculateSwathAndDETConfiguration
678
679 void dml32_CalculateSwathWidth(
680                 bool                            ForceSingleDPP,
681                 unsigned int                    NumberOfActiveSurfaces,
682                 enum source_format_class        SourcePixelFormat[],
683                 enum dm_rotation_angle          SourceRotation[],
684                 bool                            ViewportStationary[],
685                 unsigned int                    ViewportWidth[],
686                 unsigned int                    ViewportHeight[],
687                 unsigned int                    ViewportXStart[],
688                 unsigned int                    ViewportYStart[],
689                 unsigned int                    ViewportXStartC[],
690                 unsigned int                    ViewportYStartC[],
691                 unsigned int                    SurfaceWidthY[],
692                 unsigned int                    SurfaceWidthC[],
693                 unsigned int                    SurfaceHeightY[],
694                 unsigned int                    SurfaceHeightC[],
695                 enum odm_combine_mode           ODMMode[],
696                 unsigned int                    BytePerPixY[],
697                 unsigned int                    BytePerPixC[],
698                 unsigned int                    Read256BytesBlockHeightY[],
699                 unsigned int                    Read256BytesBlockHeightC[],
700                 unsigned int                    Read256BytesBlockWidthY[],
701                 unsigned int                    Read256BytesBlockWidthC[],
702                 unsigned int                    BlendingAndTiming[],
703                 unsigned int                    HActive[],
704                 double                          HRatio[],
705                 unsigned int                    DPPPerSurface[],
706
707                 /* Output */
708                 double                          SwathWidthdoubleDPPY[],
709                 double                          SwathWidthdoubleDPPC[],
710                 double                          SwathWidthY[], // per-pipe
711                 double                          SwathWidthC[], // per-pipe
712                 unsigned int                    MaximumSwathHeightY[],
713                 unsigned int                    MaximumSwathHeightC[],
714                 unsigned int                    swath_width_luma_ub[], // per-pipe
715                 unsigned int                    swath_width_chroma_ub[]) // per-pipe
716 {
717         unsigned int k, j;
718         enum odm_combine_mode MainSurfaceODMMode;
719
720         unsigned int surface_width_ub_l;
721         unsigned int surface_height_ub_l;
722         unsigned int surface_width_ub_c;
723         unsigned int surface_height_ub_c;
724
725 #ifdef __DML_VBA_DEBUG__
726         dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
727         dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
728 #endif
729
730         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
731                 if (!IsVertical(SourceRotation[k]))
732                         SwathWidthdoubleDPPY[k] = ViewportWidth[k];
733                 else
734                         SwathWidthdoubleDPPY[k] = ViewportHeight[k];
735
736 #ifdef __DML_VBA_DEBUG__
737                 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
738                 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
739 #endif
740
741                 MainSurfaceODMMode = ODMMode[k];
742                 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
743                         if (BlendingAndTiming[k] == j)
744                                 MainSurfaceODMMode = ODMMode[j];
745                 }
746
747                 if (ForceSingleDPP) {
748                         SwathWidthY[k] = SwathWidthdoubleDPPY[k];
749                 } else {
750                         if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
751                                 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
752                                                 dml_round(HActive[k] / 4.0 * HRatio[k]));
753                         } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
754                                 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
755                                                 dml_round(HActive[k] / 2.0 * HRatio[k]));
756                         } else if (DPPPerSurface[k] == 2) {
757                                 SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
758                         } else {
759                                 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
760                         }
761                 }
762
763 #ifdef __DML_VBA_DEBUG__
764                 dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
765                 dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
766                 dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
767                 dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
768                 dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
769 #endif
770
771                 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
772                                 SourcePixelFormat[k] == dm_420_12) {
773                         SwathWidthC[k] = SwathWidthY[k] / 2;
774                         SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
775                 } else {
776                         SwathWidthC[k] = SwathWidthY[k];
777                         SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
778                 }
779
780                 if (ForceSingleDPP == true) {
781                         SwathWidthY[k] = SwathWidthdoubleDPPY[k];
782                         SwathWidthC[k] = SwathWidthdoubleDPPC[k];
783                 }
784
785                 surface_width_ub_l  = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
786                 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
787                 surface_width_ub_c  = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
788                 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
789
790 #ifdef __DML_VBA_DEBUG__
791                 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
792                 dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
793                 dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
794                 dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
795                 dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
796                 dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
797                 dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
798                 dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
799                 dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
800                 dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
801 #endif
802
803                 if (!IsVertical(SourceRotation[k])) {
804                         MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
805                         MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
806                         if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
807                                 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
808                                                 dml_floor(ViewportXStart[k] +
809                                                                 SwathWidthY[k] +
810                                                                 Read256BytesBlockWidthY[k] - 1,
811                                                                 Read256BytesBlockWidthY[k]) -
812                                                                 dml_floor(ViewportXStart[k],
813                                                                 Read256BytesBlockWidthY[k]));
814                         } else {
815                                 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
816                                                 dml_ceil(SwathWidthY[k] - 1,
817                                                                 Read256BytesBlockWidthY[k]) +
818                                                                 Read256BytesBlockWidthY[k]);
819                         }
820                         if (BytePerPixC[k] > 0) {
821                                 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
822                                         swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
823                                                         dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
824                                                                         Read256BytesBlockWidthC[k] - 1,
825                                                                         Read256BytesBlockWidthC[k]) -
826                                                                         dml_floor(ViewportXStartC[k],
827                                                                         Read256BytesBlockWidthC[k]));
828                                 } else {
829                                         swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
830                                                         dml_ceil(SwathWidthC[k] - 1,
831                                                                 Read256BytesBlockWidthC[k]) +
832                                                                 Read256BytesBlockWidthC[k]);
833                                 }
834                         } else {
835                                 swath_width_chroma_ub[k] = 0;
836                         }
837                 } else {
838                         MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
839                         MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
840
841                         if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
842                                 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
843                                                 SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
844                                                 Read256BytesBlockHeightY[k]) -
845                                                 dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
846                         } else {
847                                 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
848                                                 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
849                         }
850                         if (BytePerPixC[k] > 0) {
851                                 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
852                                         swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
853                                                         dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
854                                                                         Read256BytesBlockHeightC[k] - 1,
855                                                                         Read256BytesBlockHeightC[k]) -
856                                                                         dml_floor(ViewportYStartC[k],
857                                                                                         Read256BytesBlockHeightC[k]));
858                                 } else {
859                                         swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
860                                                         dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
861                                                         Read256BytesBlockHeightC[k]);
862                                 }
863                         } else {
864                                 swath_width_chroma_ub[k] = 0;
865                         }
866                 }
867
868 #ifdef __DML_VBA_DEBUG__
869                 dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
870                 dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
871                 dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
872                 dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
873 #endif
874
875         }
876 } // CalculateSwathWidth
877
878 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
879                         unsigned int TotalNumberOfActiveDPP,
880                         bool NoChroma,
881                         enum output_encoder_class Output,
882                         enum dm_swizzle_mode SurfaceTiling,
883                         bool CompBufReservedSpaceNeedAdjustment,
884                         bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
885 {
886         bool ret_val = false;
887
888         ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
889                         TotalNumberOfActiveDPP == 1 && NoChroma);
890         if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
891                 ret_val = false;
892
893         if (SurfaceTiling == dm_sw_linear)
894                 ret_val = false;
895
896         if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
897                 ret_val = false;
898
899 #ifdef __DML_VBA_DEBUG__
900         dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, CompBufReservedSpaceNeedAdjustment);
901         dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
902         dml_print("DML::%s: ret_val = %d\n",  __func__, ret_val);
903 #endif
904
905         return (ret_val);
906 }
907
908 void dml32_CalculateDETBufferSize(
909                 unsigned int DETSizeOverride[],
910                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
911                 bool ForceSingleDPP,
912                 unsigned int NumberOfActiveSurfaces,
913                 bool UnboundedRequestEnabled,
914                 unsigned int nomDETInKByte,
915                 unsigned int MaxTotalDETInKByte,
916                 unsigned int ConfigReturnBufferSizeInKByte,
917                 unsigned int MinCompressedBufferSizeInKByte,
918                 unsigned int CompressedBufferSegmentSizeInkByteFinal,
919                 enum source_format_class SourcePixelFormat[],
920                 double ReadBandwidthLuma[],
921                 double ReadBandwidthChroma[],
922                 unsigned int RoundedUpMaxSwathSizeBytesY[],
923                 unsigned int RoundedUpMaxSwathSizeBytesC[],
924                 unsigned int DPPPerSurface[],
925                 /* Output */
926                 unsigned int DETBufferSizeInKByte[],
927                 unsigned int *CompressedBufferSizeInkByte)
928 {
929         unsigned int DETBufferSizePoolInKByte;
930         unsigned int NextDETBufferPieceInKByte;
931         bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
932         bool NextPotentialSurfaceToAssignDETPieceFound;
933         unsigned int NextSurfaceToAssignDETPiece;
934         double TotalBandwidth;
935         double BandwidthOfSurfacesNotAssignedDETPiece;
936         unsigned int max_minDET;
937         unsigned int minDET;
938         unsigned int minDET_pipe;
939         unsigned int j, k;
940
941 #ifdef __DML_VBA_DEBUG__
942         dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
943         dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
944         dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
945         dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
946         dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
947         dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
948         dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
949         dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
950                         CompressedBufferSegmentSizeInkByteFinal);
951 #endif
952
953         // Note: Will use default det size if that fits 2 swaths
954         if (UnboundedRequestEnabled) {
955                 if (DETSizeOverride[0] > 0) {
956                         DETBufferSizeInKByte[0] = DETSizeOverride[0];
957                 } else {
958                         DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
959                                         ((double) RoundedUpMaxSwathSizeBytesY[0] +
960                                                         (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
961                 }
962                 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
963         } else {
964                 DETBufferSizePoolInKByte = MaxTotalDETInKByte;
965                 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
966                         DETBufferSizeInKByte[k] = nomDETInKByte;
967                         if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
968                                         SourcePixelFormat[k] == dm_420_12) {
969                                 max_minDET = nomDETInKByte - 64;
970                         } else {
971                                 max_minDET = nomDETInKByte;
972                         }
973                         minDET = 128;
974                         minDET_pipe = 0;
975
976                         // add DET resource until can hold 2 full swaths
977                         while (minDET <= max_minDET && minDET_pipe == 0) {
978                                 if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
979                                                 (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
980                                         minDET_pipe = minDET;
981                                 minDET = minDET + 64;
982                         }
983
984 #ifdef __DML_VBA_DEBUG__
985                         dml_print("DML::%s: k=%0d minDET        = %d\n", __func__, k, minDET);
986                         dml_print("DML::%s: k=%0d max_minDET    = %d\n", __func__, k, max_minDET);
987                         dml_print("DML::%s: k=%0d minDET_pipe   = %d\n", __func__, k, minDET_pipe);
988                         dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
989                                         RoundedUpMaxSwathSizeBytesY[k]);
990                         dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
991                                         RoundedUpMaxSwathSizeBytesC[k]);
992 #endif
993
994                         if (minDET_pipe == 0) {
995                                 minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
996                                                 (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
997 #ifdef __DML_VBA_DEBUG__
998                                 dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
999                                                 __func__, k, minDET_pipe);
1000 #endif
1001                         }
1002
1003                         if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1004                                 DETBufferSizeInKByte[k] = 0;
1005                         } else if (DETSizeOverride[k] > 0) {
1006                                 DETBufferSizeInKByte[k] = DETSizeOverride[k];
1007                                 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1008                                                 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
1009                         } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
1010                                 DETBufferSizeInKByte[k] = minDET_pipe;
1011                                 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1012                                                 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
1013                         }
1014
1015 #ifdef __DML_VBA_DEBUG__
1016                         dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
1017                         dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
1018                         dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1019                         dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
1020 #endif
1021                 }
1022
1023                 TotalBandwidth = 0;
1024                 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1025                         if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
1026                                 TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
1027                 }
1028 #ifdef __DML_VBA_DEBUG__
1029                 dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
1030                 for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
1031                         dml_print("DML::%s: k=%d DETBufferSizeInKByte   = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1032                 dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
1033                 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
1034 #endif
1035                 BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
1036                 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1037
1038                         if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1039                                 DETPieceAssignedToThisSurfaceAlready[k] = true;
1040                         } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
1041                                         (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
1042                                         ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
1043                                 DETPieceAssignedToThisSurfaceAlready[k] = true;
1044                                 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1045                                                 ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1046                         } else {
1047                                 DETPieceAssignedToThisSurfaceAlready[k] = false;
1048                         }
1049 #ifdef __DML_VBA_DEBUG__
1050                         dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
1051                                         DETPieceAssignedToThisSurfaceAlready[k]);
1052                         dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
1053                                         BandwidthOfSurfacesNotAssignedDETPiece);
1054 #endif
1055                 }
1056
1057                 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
1058                         NextPotentialSurfaceToAssignDETPieceFound = false;
1059                         NextSurfaceToAssignDETPiece = 0;
1060
1061                         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1062 #ifdef __DML_VBA_DEBUG__
1063                                 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
1064                                                 ReadBandwidthLuma[k]);
1065                                 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
1066                                                 ReadBandwidthChroma[k]);
1067                                 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
1068                                                 ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1069                                 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
1070                                                 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1071                                 dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
1072                                                 NextSurfaceToAssignDETPiece);
1073 #endif
1074                                 if (!DETPieceAssignedToThisSurfaceAlready[k] &&
1075                                                 (!NextPotentialSurfaceToAssignDETPieceFound ||
1076                                                 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
1077                                                 ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1078                                                 ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
1079                                         NextSurfaceToAssignDETPiece = k;
1080                                         NextPotentialSurfaceToAssignDETPieceFound = true;
1081                                 }
1082 #ifdef __DML_VBA_DEBUG__
1083                                 dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
1084                                                 __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1085                                 dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
1086                                                 __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1087 #endif
1088                         }
1089
1090                         if (NextPotentialSurfaceToAssignDETPieceFound) {
1091                                 // Note: To show the banker's rounding behavior in VBA and also the fact
1092                                 // that the DET buffer size varies due to precision issue
1093                                 //
1094                                 //double tmp1 =  ((double) DETBufferSizePoolInKByte *
1095                                 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1096                                 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1097                                 // BandwidthOfSurfacesNotAssignedDETPiece /
1098                                 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1099                                 //double tmp2 =  dml_round((double) DETBufferSizePoolInKByte *
1100                                 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1101                                 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1102                                  //BandwidthOfSurfacesNotAssignedDETPiece /
1103                                 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1104                                 //
1105                                 //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
1106                                 //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
1107
1108                                 NextDETBufferPieceInKByte = dml_min(
1109                                         dml_round((double) DETBufferSizePoolInKByte *
1110                                                 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1111                                                 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1112                                                 BandwidthOfSurfacesNotAssignedDETPiece /
1113                                                 ((ForceSingleDPP ? 1 :
1114                                                                 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
1115                                                 (ForceSingleDPP ? 1 :
1116                                                                 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
1117                                                 dml_floor((double) DETBufferSizePoolInKByte,
1118                                                 (ForceSingleDPP ? 1 :
1119                                                                 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1120
1121                                 // Above calculation can assign the entire DET buffer allocation to a single pipe.
1122                                 // We should limit the per-pipe DET size to the nominal / max per pipe.
1123                                 if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1124                                         if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
1125                                                         nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1126                                                 NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
1127                                                                 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
1128                                         } else {
1129                                                 // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1130                                                 // already has the max per-pipe value
1131                                                 NextDETBufferPieceInKByte = 0;
1132                                         }
1133                                 }
1134
1135 #ifdef __DML_VBA_DEBUG__
1136                                 dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
1137                                         DETBufferSizePoolInKByte);
1138                                 dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
1139                                         NextSurfaceToAssignDETPiece);
1140                                 dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
1141                                         NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1142                                 dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
1143                                         NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1144                                 dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
1145                                         __func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
1146                                 dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
1147                                         NextDETBufferPieceInKByte);
1148                                 dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
1149                                         __func__, j, NextSurfaceToAssignDETPiece,
1150                                         DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1151 #endif
1152
1153                                 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
1154                                                 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1155                                                 + NextDETBufferPieceInKByte
1156                                                 / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
1157 #ifdef __DML_VBA_DEBUG__
1158                                 dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1159 #endif
1160
1161                                 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
1162                                 DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
1163                                 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1164                                                 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1165                                                                 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1166                         }
1167                 }
1168                 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1169         }
1170         *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
1171
1172 #ifdef __DML_VBA_DEBUG__
1173         dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1174         dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
1175         for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
1176                 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
1177                                 __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1178         }
1179 #endif
1180 } // CalculateDETBufferSize
1181
1182 void dml32_CalculateODMMode(
1183                 unsigned int MaximumPixelsPerLinePerDSCUnit,
1184                 unsigned int HActive,
1185                 enum output_encoder_class Output,
1186                 enum odm_combine_policy ODMUse,
1187                 double StateDispclk,
1188                 double MaxDispclk,
1189                 bool DSCEnable,
1190                 unsigned int TotalNumberOfActiveDPP,
1191                 unsigned int MaxNumDPP,
1192                 double PixelClock,
1193                 double DISPCLKDPPCLKDSCCLKDownSpreading,
1194                 double DISPCLKRampingMargin,
1195                 double DISPCLKDPPCLKVCOSpeed,
1196                 unsigned int NumberOfDSCSlices,
1197
1198                 /* Output */
1199                 bool *TotalAvailablePipesSupport,
1200                 unsigned int *NumberOfDPP,
1201                 enum odm_combine_mode *ODMMode,
1202                 double *RequiredDISPCLKPerSurface)
1203 {
1204
1205         double SurfaceRequiredDISPCLKWithoutODMCombine;
1206         double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1207         double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1208
1209         SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
1210                         PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1211                         MaxDispclk);
1212         SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
1213                         PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1214                         MaxDispclk);
1215         SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
1216                         PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1217                         MaxDispclk);
1218         *TotalAvailablePipesSupport = true;
1219         *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
1220
1221         if (ODMUse == dm_odm_combine_policy_none)
1222                 *ODMMode = dm_odm_combine_mode_disabled;
1223
1224         *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
1225         *NumberOfDPP = 0;
1226
1227         // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
1228         // (ODMUse == "" || ODMUse == "CombineAsNeeded")
1229
1230         if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
1231                         ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
1232                                         (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit))
1233                                         || NumberOfDSCSlices > 8)))) {
1234                 if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
1235                         *ODMMode = dm_odm_combine_mode_4to1;
1236                         *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1237                         *NumberOfDPP = 4;
1238                 } else {
1239                         *TotalAvailablePipesSupport = false;
1240                 }
1241         } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
1242                         (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
1243                                         SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
1244                                         (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit))
1245                                         || (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) {
1246                 if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
1247                         *ODMMode = dm_odm_combine_mode_2to1;
1248                         *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1249                         *NumberOfDPP = 2;
1250                 } else {
1251                         *TotalAvailablePipesSupport = false;
1252                 }
1253         } else {
1254                 if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
1255                         *NumberOfDPP = 1;
1256                 else
1257                         *TotalAvailablePipesSupport = false;
1258         }
1259 }
1260
1261 double dml32_CalculateRequiredDispclk(
1262                 enum odm_combine_mode ODMMode,
1263                 double PixelClock,
1264                 double DISPCLKDPPCLKDSCCLKDownSpreading,
1265                 double DISPCLKRampingMargin,
1266                 double DISPCLKDPPCLKVCOSpeed,
1267                 double MaxDispclk)
1268 {
1269         double RequiredDispclk = 0.;
1270         double PixelClockAfterODM;
1271         double DISPCLKWithRampingRoundedToDFSGranularity;
1272         double DISPCLKWithoutRampingRoundedToDFSGranularity;
1273         double MaxDispclkRoundedDownToDFSGranularity;
1274
1275         if (ODMMode == dm_odm_combine_mode_4to1)
1276                 PixelClockAfterODM = PixelClock / 4;
1277         else if (ODMMode == dm_odm_combine_mode_2to1)
1278                 PixelClockAfterODM = PixelClock / 2;
1279         else
1280                 PixelClockAfterODM = PixelClock;
1281
1282
1283         DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1284                         PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1285                                         * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);
1286
1287         DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1288                         PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);
1289
1290         MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
1291
1292         if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1293                 RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
1294         else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1295                 RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
1296         else
1297                 RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
1298
1299         return RequiredDispclk;
1300 }
1301
1302 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
1303 {
1304         if (Clock <= 0.0)
1305                 return 0.0;
1306
1307         if (round_up)
1308                 return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
1309         else
1310                 return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
1311 }
1312
1313 void dml32_CalculateOutputLink(
1314                 double PHYCLKPerState,
1315                 double PHYCLKD18PerState,
1316                 double PHYCLKD32PerState,
1317                 double Downspreading,
1318                 bool IsMainSurfaceUsingTheIndicatedTiming,
1319                 enum output_encoder_class Output,
1320                 enum output_format_class OutputFormat,
1321                 unsigned int HTotal,
1322                 unsigned int HActive,
1323                 double PixelClockBackEnd,
1324                 double ForcedOutputLinkBPP,
1325                 unsigned int DSCInputBitPerComponent,
1326                 unsigned int NumberOfDSCSlices,
1327                 double AudioSampleRate,
1328                 unsigned int AudioSampleLayout,
1329                 enum odm_combine_mode ODMModeNoDSC,
1330                 enum odm_combine_mode ODMModeDSC,
1331                 bool DSCEnable,
1332                 unsigned int OutputLinkDPLanes,
1333                 enum dm_output_link_dp_rate OutputLinkDPRate,
1334
1335                 /* Output */
1336                 bool *RequiresDSC,
1337                 double *RequiresFEC,
1338                 double  *OutBpp,
1339                 enum dm_output_type *OutputType,
1340                 enum dm_output_rate *OutputRate,
1341                 unsigned int *RequiredSlots)
1342 {
1343         bool LinkDSCEnable;
1344         unsigned int dummy;
1345         *RequiresDSC = false;
1346         *RequiresFEC = false;
1347         *OutBpp = 0;
1348         *OutputType = dm_output_type_unknown;
1349         *OutputRate = dm_output_rate_unknown;
1350
1351         if (IsMainSurfaceUsingTheIndicatedTiming) {
1352                 if (Output == dm_hdmi) {
1353                         *RequiresDSC = false;
1354                         *RequiresFEC = false;
1355                         *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
1356                                         PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat,
1357                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1358                                         ODMModeNoDSC, ODMModeDSC, &dummy);
1359                         //OutputTypeAndRate = "HDMI";
1360                         *OutputType = dm_output_type_hdmi;
1361
1362                 } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
1363                         if (DSCEnable == true) {
1364                                 *RequiresDSC = true;
1365                                 LinkDSCEnable = true;
1366                                 if (Output == dm_dp || Output == dm_dp2p0)
1367                                         *RequiresFEC = true;
1368                                 else
1369                                         *RequiresFEC = false;
1370                         } else {
1371                                 *RequiresDSC = false;
1372                                 LinkDSCEnable = false;
1373                                 if (Output == dm_dp2p0)
1374                                         *RequiresFEC = true;
1375                                 else
1376                                         *RequiresFEC = false;
1377                         }
1378                         if (Output == dm_dp2p0) {
1379                                 *OutBpp = 0;
1380                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
1381                                                 PHYCLKD32PerState >= 10000 / 32) {
1382                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1383                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1384                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1385                                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1386                                                         AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1387                                         if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true &&
1388                                                         ForcedOutputLinkBPP == 0) {
1389                                                 *RequiresDSC = true;
1390                                                 LinkDSCEnable = true;
1391                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1392                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1393                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1394                                                                 OutputFormat, DSCInputBitPerComponent,
1395                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1396                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1397                                         }
1398                                         //OutputTypeAndRate = Output & " UHBR10";
1399                                         *OutputType = dm_output_type_dp2p0;
1400                                         *OutputRate = dm_output_rate_dp_rate_uhbr10;
1401                                 }
1402                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
1403                                                 *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) {
1404                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1405                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1406                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1407                                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1408                                                         AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1409
1410                                         if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
1411                                                         ForcedOutputLinkBPP == 0) {
1412                                                 *RequiresDSC = true;
1413                                                 LinkDSCEnable = true;
1414                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1415                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1416                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1417                                                                 OutputFormat, DSCInputBitPerComponent,
1418                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1419                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1420                                         }
1421                                         //OutputTypeAndRate = Output & " UHBR13p5";
1422                                         *OutputType = dm_output_type_dp2p0;
1423                                         *OutputRate = dm_output_rate_dp_rate_uhbr13p5;
1424                                 }
1425                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
1426                                                 *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
1427                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1428                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1429                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1430                                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1431                                                         AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1432                                         if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1433                                                 *RequiresDSC = true;
1434                                                 LinkDSCEnable = true;
1435                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1436                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1437                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1438                                                                 OutputFormat, DSCInputBitPerComponent,
1439                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1440                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1441                                         }
1442                                         //OutputTypeAndRate = Output & " UHBR20";
1443                                         *OutputType = dm_output_type_dp2p0;
1444                                         *OutputRate = dm_output_rate_dp_rate_uhbr20;
1445                                 }
1446                         } else {
1447                                 *OutBpp = 0;
1448                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
1449                                                 PHYCLKPerState >= 270) {
1450                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1451                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1452                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1453                                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1454                                                         AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1455                                         if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
1456                                                         ForcedOutputLinkBPP == 0) {
1457                                                 *RequiresDSC = true;
1458                                                 LinkDSCEnable = true;
1459                                                 if (Output == dm_dp)
1460                                                         *RequiresFEC = true;
1461                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1462                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1463                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1464                                                                 OutputFormat, DSCInputBitPerComponent,
1465                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1466                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1467                                         }
1468                                         //OutputTypeAndRate = Output & " HBR";
1469                                         *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1470                                         *OutputRate = dm_output_rate_dp_rate_hbr;
1471                                 }
1472                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
1473                                                 *OutBpp == 0 && PHYCLKPerState >= 540) {
1474                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1475                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1476                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1477                                                         DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1478                                                         AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1479
1480                                         if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
1481                                                         ForcedOutputLinkBPP == 0) {
1482                                                 *RequiresDSC = true;
1483                                                 LinkDSCEnable = true;
1484                                                 if (Output == dm_dp)
1485                                                         *RequiresFEC = true;
1486
1487                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1488                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1489                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1490                                                                 OutputFormat, DSCInputBitPerComponent,
1491                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1492                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1493                                         }
1494                                         //OutputTypeAndRate = Output & " HBR2";
1495                                         *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1496                                         *OutputRate = dm_output_rate_dp_rate_hbr2;
1497                                 }
1498                                 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
1499                                         *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1500                                                         OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1501                                                         ForcedOutputLinkBPP, LinkDSCEnable, Output,
1502                                                         OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
1503                                                         AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
1504                                                         RequiredSlots);
1505
1506                                         if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1507                                                 *RequiresDSC = true;
1508                                                 LinkDSCEnable = true;
1509                                                 if (Output == dm_dp)
1510                                                         *RequiresFEC = true;
1511
1512                                                 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1513                                                                 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1514                                                                 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1515                                                                 OutputFormat, DSCInputBitPerComponent,
1516                                                                 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1517                                                                 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1518                                         }
1519                                         //OutputTypeAndRate = Output & " HBR3";
1520                                         *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1521                                         *OutputRate = dm_output_rate_dp_rate_hbr3;
1522                                 }
1523                         }
1524                 }
1525         }
1526 }
1527
1528 void dml32_CalculateDPPCLK(
1529                 unsigned int NumberOfActiveSurfaces,
1530                 double DISPCLKDPPCLKDSCCLKDownSpreading,
1531                 double DISPCLKDPPCLKVCOSpeed,
1532                 double DPPCLKUsingSingleDPP[],
1533                 unsigned int DPPPerSurface[],
1534
1535                 /* output */
1536                 double *GlobalDPPCLK,
1537                 double Dppclk[])
1538 {
1539         unsigned int k;
1540         *GlobalDPPCLK = 0;
1541         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1542                 Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1543                 *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
1544         }
1545         *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
1546         for (k = 0; k < NumberOfActiveSurfaces; ++k)
1547                 Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
1548 }
1549
1550 double dml32_TruncToValidBPP(
1551                 double LinkBitRate,
1552                 unsigned int Lanes,
1553                 unsigned int HTotal,
1554                 unsigned int HActive,
1555                 double PixelClock,
1556                 double DesiredBPP,
1557                 bool DSCEnable,
1558                 enum output_encoder_class Output,
1559                 enum output_format_class Format,
1560                 unsigned int DSCInputBitPerComponent,
1561                 unsigned int DSCSlices,
1562                 unsigned int AudioRate,
1563                 unsigned int AudioLayout,
1564                 enum odm_combine_mode ODMModeNoDSC,
1565                 enum odm_combine_mode ODMModeDSC,
1566                 /* Output */
1567                 unsigned int *RequiredSlots)
1568 {
1569         double    MaxLinkBPP;
1570         unsigned int   MinDSCBPP;
1571         double    MaxDSCBPP;
1572         unsigned int   NonDSCBPP0;
1573         unsigned int   NonDSCBPP1;
1574         unsigned int   NonDSCBPP2;
1575         unsigned int   NonDSCBPP3;
1576
1577         if (Format == dm_420) {
1578                 NonDSCBPP0 = 12;
1579                 NonDSCBPP1 = 15;
1580                 NonDSCBPP2 = 18;
1581                 MinDSCBPP = 6;
1582                 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
1583         } else if (Format == dm_444) {
1584                 NonDSCBPP0 = 18;
1585                 NonDSCBPP1 = 24;
1586                 NonDSCBPP2 = 30;
1587                 NonDSCBPP3 = 36;
1588                 MinDSCBPP = 8;
1589                 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
1590         } else {
1591                 if (Output == dm_hdmi) {
1592                         NonDSCBPP0 = 24;
1593                         NonDSCBPP1 = 24;
1594                         NonDSCBPP2 = 24;
1595                 } else {
1596                         NonDSCBPP0 = 16;
1597                         NonDSCBPP1 = 20;
1598                         NonDSCBPP2 = 24;
1599                 }
1600                 if (Format == dm_n422) {
1601                         MinDSCBPP = 7;
1602                         MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
1603                 } else {
1604                         MinDSCBPP = 8;
1605                         MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
1606                 }
1607         }
1608         if (Output == dm_dp2p0) {
1609                 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
1610         } else if (DSCEnable && Output == dm_dp) {
1611                 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
1612         } else {
1613                 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
1614         }
1615
1616         if (DSCEnable) {
1617                 if (ODMModeDSC == dm_odm_combine_mode_4to1)
1618                         MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1619                 else if (ODMModeDSC == dm_odm_combine_mode_2to1)
1620                         MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1621                 else if (ODMModeDSC == dm_odm_split_mode_1to2)
1622                         MaxLinkBPP = 2 * MaxLinkBPP;
1623         } else {
1624                 if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
1625                         MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1626                 else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
1627                         MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1628                 else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
1629                         MaxLinkBPP = 2 * MaxLinkBPP;
1630         }
1631
1632         if (DesiredBPP == 0) {
1633                 if (DSCEnable) {
1634                         if (MaxLinkBPP < MinDSCBPP)
1635                                 return BPP_INVALID;
1636                         else if (MaxLinkBPP >= MaxDSCBPP)
1637                                 return MaxDSCBPP;
1638                         else
1639                                 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
1640                 } else {
1641                         if (MaxLinkBPP >= NonDSCBPP3)
1642                                 return NonDSCBPP3;
1643                         else if (MaxLinkBPP >= NonDSCBPP2)
1644                                 return NonDSCBPP2;
1645                         else if (MaxLinkBPP >= NonDSCBPP1)
1646                                 return NonDSCBPP1;
1647                         else if (MaxLinkBPP >= NonDSCBPP0)
1648                                 return 16.0;
1649                         else
1650                                 return BPP_INVALID;
1651                 }
1652         } else {
1653                 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
1654                                 DesiredBPP == NonDSCBPP0 || DesiredBPP == NonDSCBPP3)) ||
1655                                 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
1656                         return BPP_INVALID;
1657                 else
1658                         return DesiredBPP;
1659         }
1660
1661         *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
1662
1663         return BPP_INVALID;
1664 } // TruncToValidBPP
1665
1666 double dml32_RequiredDTBCLK(
1667                 bool              DSCEnable,
1668                 double               PixelClock,
1669                 enum output_format_class  OutputFormat,
1670                 double               OutputBpp,
1671                 unsigned int              DSCSlices,
1672                 unsigned int                 HTotal,
1673                 unsigned int                 HActive,
1674                 unsigned int              AudioRate,
1675                 unsigned int              AudioLayout)
1676 {
1677         double PixelWordRate;
1678         double HCActive;
1679         double HCBlank;
1680         double AverageTribyteRate;
1681         double HActiveTribyteRate;
1682
1683         if (DSCEnable != true)
1684                 return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
1685
1686         PixelWordRate = PixelClock /  (OutputFormat == dm_444 ? 1 : 2);
1687         HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
1688                         dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
1689         HCBlank = 64 + 32 *
1690                         dml_ceil(AudioRate *  (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
1691         AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
1692         HActiveTribyteRate = PixelWordRate * HCActive / HActive;
1693         return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
1694 }
1695
1696 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
1697                 enum odm_combine_mode ODMMode,
1698                 unsigned int DSCInputBitPerComponent,
1699                 double OutputBpp,
1700                 unsigned int HActive,
1701                 unsigned int HTotal,
1702                 unsigned int NumberOfDSCSlices,
1703                 enum output_format_class  OutputFormat,
1704                 enum output_encoder_class Output,
1705                 double PixelClock,
1706                 double PixelClockBackEnd)
1707 {
1708         unsigned int DSCDelayRequirement_val;
1709
1710         if (DSCEnabled == true && OutputBpp != 0) {
1711                 if (ODMMode == dm_odm_combine_mode_4to1) {
1712                         DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1713                                         dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
1714                                         OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1715                 } else if (ODMMode == dm_odm_combine_mode_2to1) {
1716                         DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1717                                         dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
1718                                         OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1719                 } else {
1720                         DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1721                                         dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
1722                                         OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
1723                 }
1724
1725                 DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
1726                                 dml_ceil(DSCDelayRequirement_val / HActive, 1);
1727
1728                 DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
1729
1730         } else {
1731                 DSCDelayRequirement_val = 0;
1732         }
1733
1734 #ifdef __DML_VBA_DEBUG__
1735         dml_print("DML::%s: DSCEnabled              = %d\n", __func__, DSCEnabled);
1736         dml_print("DML::%s: OutputBpp               = %f\n", __func__, OutputBpp);
1737         dml_print("DML::%s: HActive                 = %d\n", __func__, HActive);
1738         dml_print("DML::%s: OutputFormat            = %d\n", __func__, OutputFormat);
1739         dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
1740         dml_print("DML::%s: NumberOfDSCSlices       = %d\n", __func__, NumberOfDSCSlices);
1741         dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
1742 #endif
1743
1744         return DSCDelayRequirement_val;
1745 }
1746
1747 void dml32_CalculateSurfaceSizeInMall(
1748                 unsigned int NumberOfActiveSurfaces,
1749                 unsigned int MALLAllocatedForDCN,
1750                 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1751                 bool DCCEnable[],
1752                 bool ViewportStationary[],
1753                 unsigned int ViewportXStartY[],
1754                 unsigned int ViewportYStartY[],
1755                 unsigned int ViewportXStartC[],
1756                 unsigned int ViewportYStartC[],
1757                 unsigned int ViewportWidthY[],
1758                 unsigned int ViewportHeightY[],
1759                 unsigned int BytesPerPixelY[],
1760                 unsigned int ViewportWidthC[],
1761                 unsigned int ViewportHeightC[],
1762                 unsigned int BytesPerPixelC[],
1763                 unsigned int SurfaceWidthY[],
1764                 unsigned int SurfaceWidthC[],
1765                 unsigned int SurfaceHeightY[],
1766                 unsigned int SurfaceHeightC[],
1767                 unsigned int Read256BytesBlockWidthY[],
1768                 unsigned int Read256BytesBlockWidthC[],
1769                 unsigned int Read256BytesBlockHeightY[],
1770                 unsigned int Read256BytesBlockHeightC[],
1771                 unsigned int ReadBlockWidthY[],
1772                 unsigned int ReadBlockWidthC[],
1773                 unsigned int ReadBlockHeightY[],
1774                 unsigned int ReadBlockHeightC[],
1775
1776                 /* Output */
1777                 unsigned int    SurfaceSizeInMALL[],
1778                 bool *ExceededMALLSize)
1779 {
1780         unsigned int TotalSurfaceSizeInMALL  = 0;
1781         unsigned int k;
1782
1783         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1784                 if (ViewportStationary[k]) {
1785                         SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
1786                                         dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
1787                                                 ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
1788                                                 ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
1789                                                 ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1790                                                 ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
1791                                                 dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];
1792
1793                         if (ReadBlockWidthC[k] > 0) {
1794                                 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1795                                                 dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
1796                                                         dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
1797                                                         ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
1798                                                         dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
1799                                                         dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
1800                                                         dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1801                                                         ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
1802                                                         dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
1803                                                         BytesPerPixelC[k];
1804                         }
1805                         if (DCCEnable[k] == true) {
1806                                 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1807                                                 dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]),
1808                                                         dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
1809                                                         Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
1810                                                         - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
1811                                                         * dml_min(dml_ceil(SurfaceHeightY[k], 8 *
1812                                                         Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1813                                                         ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
1814                                                         Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8
1815                                                         * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256;
1816                                 if (Read256BytesBlockWidthC[k] > 0) {
1817                                         SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1818                                                         dml_min(dml_ceil(SurfaceWidthC[k], 8 *
1819                                                                 Read256BytesBlockWidthC[k]),
1820                                                                 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
1821                                                                 * Read256BytesBlockWidthC[k] - 1, 8 *
1822                                                                 Read256BytesBlockWidthC[k]) -
1823                                                                 dml_floor(ViewportXStartC[k], 8 *
1824                                                                 Read256BytesBlockWidthC[k])) *
1825                                                                 dml_min(dml_ceil(SurfaceHeightC[k], 8 *
1826                                                                 Read256BytesBlockHeightC[k]),
1827                                                                 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1828                                                                 8 * Read256BytesBlockHeightC[k] - 1, 8 *
1829                                                                 Read256BytesBlockHeightC[k]) -
1830                                                                 dml_floor(ViewportYStartC[k], 8 *
1831                                                                 Read256BytesBlockHeightC[k])) *
1832                                                                 BytesPerPixelC[k] / 256;
1833                                 }
1834                         }
1835                 } else {
1836                         SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
1837                                         ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
1838                                         dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
1839                                                         ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
1840                                                         BytesPerPixelY[k];
1841                         if (ReadBlockWidthC[k] > 0) {
1842                                 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1843                                                 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
1844                                                                 ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
1845                                                 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
1846                                                                 ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
1847                                                                 BytesPerPixelC[k];
1848                         }
1849                         if (DCCEnable[k] == true) {
1850                                 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1851                                                 dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 *
1852                                                                 Read256BytesBlockWidthY[k] - 1), 8 *
1853                                                                 Read256BytesBlockWidthY[k]) *
1854                                                 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
1855                                                                 Read256BytesBlockHeightY[k] - 1), 8 *
1856                                                                 Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256;
1857
1858                                 if (Read256BytesBlockWidthC[k] > 0) {
1859                                         SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1860                                                         dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 *
1861                                                                         Read256BytesBlockWidthC[k] - 1), 8 *
1862                                                                         Read256BytesBlockWidthC[k]) *
1863                                                         dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
1864                                                                         Read256BytesBlockHeightC[k] - 1), 8 *
1865                                                                         Read256BytesBlockHeightC[k]) *
1866                                                                         BytesPerPixelC[k] / 256;
1867                                 }
1868                         }
1869                 }
1870         }
1871
1872         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1873                 if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
1874                         TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
1875         }
1876         *ExceededMALLSize =  (TotalSurfaceSizeInMALL <= MALLAllocatedForDCN * 1024 * 1024 ? false : true);
1877 } // CalculateSurfaceSizeInMall
1878
1879 void dml32_CalculateVMRowAndSwath(
1880                 unsigned int NumberOfActiveSurfaces,
1881                 DmlPipe myPipe[],
1882                 unsigned int SurfaceSizeInMALL[],
1883                 unsigned int PTEBufferSizeInRequestsLuma,
1884                 unsigned int PTEBufferSizeInRequestsChroma,
1885                 unsigned int DCCMetaBufferSizeBytes,
1886                 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1887                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
1888                 unsigned int MALLAllocatedForDCN,
1889                 double SwathWidthY[],
1890                 double SwathWidthC[],
1891                 bool GPUVMEnable,
1892                 bool HostVMEnable,
1893                 unsigned int HostVMMaxNonCachedPageTableLevels,
1894                 unsigned int GPUVMMaxPageTableLevels,
1895                 unsigned int GPUVMMinPageSizeKBytes[],
1896                 unsigned int HostVMMinPageSize,
1897
1898                 /* Output */
1899                 bool PTEBufferSizeNotExceeded[],
1900                 bool DCCMetaBufferSizeNotExceeded[],
1901                 unsigned int dpte_row_width_luma_ub[],
1902                 unsigned int dpte_row_width_chroma_ub[],
1903                 unsigned int dpte_row_height_luma[],
1904                 unsigned int dpte_row_height_chroma[],
1905                 unsigned int dpte_row_height_linear_luma[],     // VBA_DELTA
1906                 unsigned int dpte_row_height_linear_chroma[],   // VBA_DELTA
1907                 unsigned int meta_req_width[],
1908                 unsigned int meta_req_width_chroma[],
1909                 unsigned int meta_req_height[],
1910                 unsigned int meta_req_height_chroma[],
1911                 unsigned int meta_row_width[],
1912                 unsigned int meta_row_width_chroma[],
1913                 unsigned int meta_row_height[],
1914                 unsigned int meta_row_height_chroma[],
1915                 unsigned int vm_group_bytes[],
1916                 unsigned int dpte_group_bytes[],
1917                 unsigned int PixelPTEReqWidthY[],
1918                 unsigned int PixelPTEReqHeightY[],
1919                 unsigned int PTERequestSizeY[],
1920                 unsigned int PixelPTEReqWidthC[],
1921                 unsigned int PixelPTEReqHeightC[],
1922                 unsigned int PTERequestSizeC[],
1923                 unsigned int dpde0_bytes_per_frame_ub_l[],
1924                 unsigned int meta_pte_bytes_per_frame_ub_l[],
1925                 unsigned int dpde0_bytes_per_frame_ub_c[],
1926                 unsigned int meta_pte_bytes_per_frame_ub_c[],
1927                 double PrefetchSourceLinesY[],
1928                 double PrefetchSourceLinesC[],
1929                 double VInitPreFillY[],
1930                 double VInitPreFillC[],
1931                 unsigned int MaxNumSwathY[],
1932                 unsigned int MaxNumSwathC[],
1933                 double meta_row_bw[],
1934                 double dpte_row_bw[],
1935                 double PixelPTEBytesPerRow[],
1936                 double PDEAndMetaPTEBytesFrame[],
1937                 double MetaRowByte[],
1938                 bool use_one_row_for_frame[],
1939                 bool use_one_row_for_frame_flip[],
1940                 bool UsesMALLForStaticScreen[],
1941                 bool PTE_BUFFER_MODE[],
1942                 unsigned int BIGK_FRAGMENT_SIZE[])
1943 {
1944         unsigned int k;
1945         unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
1946         unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
1947         unsigned int PDEAndMetaPTEBytesFrameY;
1948         unsigned int PDEAndMetaPTEBytesFrameC;
1949         unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
1950         unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
1951         unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
1952         unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
1953         unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
1954         unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
1955         unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1956         unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
1957         unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1958         unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
1959         bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
1960
1961         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1962                 if (HostVMEnable == true) {
1963                         vm_group_bytes[k] = 512;
1964                         dpte_group_bytes[k] = 512;
1965                 } else if (GPUVMEnable == true) {
1966                         vm_group_bytes[k] = 2048;
1967                         if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
1968                                 dpte_group_bytes[k] = 512;
1969                         else
1970                                 dpte_group_bytes[k] = 2048;
1971                 } else {
1972                         vm_group_bytes[k] = 0;
1973                         dpte_group_bytes[k] = 0;
1974                 }
1975
1976                 if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
1977                                 myPipe[k].SourcePixelFormat == dm_420_12 ||
1978                                 myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
1979                         if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
1980                                         !IsVertical(myPipe[k].SourceRotation)) {
1981                                 PTEBufferSizeInRequestsForLuma[k] =
1982                                                 (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
1983                                 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
1984                         } else {
1985                                 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
1986                                 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
1987                         }
1988
1989                         PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
1990                                         myPipe[k].ViewportStationary,
1991                                         myPipe[k].DCCEnable,
1992                                         myPipe[k].DPPPerSurface,
1993                                         myPipe[k].BlockHeight256BytesC,
1994                                         myPipe[k].BlockWidth256BytesC,
1995                                         myPipe[k].SourcePixelFormat,
1996                                         myPipe[k].SurfaceTiling,
1997                                         myPipe[k].BytePerPixelC,
1998                                         myPipe[k].SourceRotation,
1999                                         SwathWidthC[k],
2000                                         myPipe[k].ViewportHeightChroma,
2001                                         myPipe[k].ViewportXStartC,
2002                                         myPipe[k].ViewportYStartC,
2003                                         GPUVMEnable,
2004                                         HostVMEnable,
2005                                         HostVMMaxNonCachedPageTableLevels,
2006                                         GPUVMMaxPageTableLevels,
2007                                         GPUVMMinPageSizeKBytes[k],
2008                                         HostVMMinPageSize,
2009                                         PTEBufferSizeInRequestsForChroma[k],
2010                                         myPipe[k].PitchC,
2011                                         myPipe[k].DCCMetaPitchC,
2012                                         myPipe[k].BlockWidthC,
2013                                         myPipe[k].BlockHeightC,
2014
2015                                         /* Output */
2016                                         &MetaRowByteC[k],
2017                                         &PixelPTEBytesPerRowC[k],
2018                                         &dpte_row_width_chroma_ub[k],
2019                                         &dpte_row_height_chroma[k],
2020                                         &dpte_row_height_linear_chroma[k],
2021                                         &PixelPTEBytesPerRowC_one_row_per_frame[k],
2022                                         &dpte_row_width_chroma_ub_one_row_per_frame[k],
2023                                         &dpte_row_height_chroma_one_row_per_frame[k],
2024                                         &meta_req_width_chroma[k],
2025                                         &meta_req_height_chroma[k],
2026                                         &meta_row_width_chroma[k],
2027                                         &meta_row_height_chroma[k],
2028                                         &PixelPTEReqWidthC[k],
2029                                         &PixelPTEReqHeightC[k],
2030                                         &PTERequestSizeC[k],
2031                                         &dpde0_bytes_per_frame_ub_c[k],
2032                                         &meta_pte_bytes_per_frame_ub_c[k]);
2033
2034                         PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
2035                                         myPipe[k].VRatioChroma,
2036                                         myPipe[k].VTapsChroma,
2037                                         myPipe[k].InterlaceEnable,
2038                                         myPipe[k].ProgressiveToInterlaceUnitInOPP,
2039                                         myPipe[k].SwathHeightC,
2040                                         myPipe[k].SourceRotation,
2041                                         myPipe[k].ViewportStationary,
2042                                         SwathWidthC[k],
2043                                         myPipe[k].ViewportHeightChroma,
2044                                         myPipe[k].ViewportXStartC,
2045                                         myPipe[k].ViewportYStartC,
2046
2047                                         /* Output */
2048                                         &VInitPreFillC[k],
2049                                         &MaxNumSwathC[k]);
2050                 } else {
2051                         PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
2052                         PTEBufferSizeInRequestsForChroma[k] = 0;
2053                         PixelPTEBytesPerRowC[k] = 0;
2054                         PDEAndMetaPTEBytesFrameC = 0;
2055                         MetaRowByteC[k] = 0;
2056                         MaxNumSwathC[k] = 0;
2057                         PrefetchSourceLinesC[k] = 0;
2058                         dpte_row_height_chroma_one_row_per_frame[k] = 0;
2059                         dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
2060                         PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
2061                 }
2062
2063                 PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
2064                                 myPipe[k].ViewportStationary,
2065                                 myPipe[k].DCCEnable,
2066                                 myPipe[k].DPPPerSurface,
2067                                 myPipe[k].BlockHeight256BytesY,
2068                                 myPipe[k].BlockWidth256BytesY,
2069                                 myPipe[k].SourcePixelFormat,
2070                                 myPipe[k].SurfaceTiling,
2071                                 myPipe[k].BytePerPixelY,
2072                                 myPipe[k].SourceRotation,
2073                                 SwathWidthY[k],
2074                                 myPipe[k].ViewportHeight,
2075                                 myPipe[k].ViewportXStart,
2076                                 myPipe[k].ViewportYStart,
2077                                 GPUVMEnable,
2078                                 HostVMEnable,
2079                                 HostVMMaxNonCachedPageTableLevels,
2080                                 GPUVMMaxPageTableLevels,
2081                                 GPUVMMinPageSizeKBytes[k],
2082                                 HostVMMinPageSize,
2083                                 PTEBufferSizeInRequestsForLuma[k],
2084                                 myPipe[k].PitchY,
2085                                 myPipe[k].DCCMetaPitchY,
2086                                 myPipe[k].BlockWidthY,
2087                                 myPipe[k].BlockHeightY,
2088
2089                                 /* Output */
2090                                 &MetaRowByteY[k],
2091                                 &PixelPTEBytesPerRowY[k],
2092                                 &dpte_row_width_luma_ub[k],
2093                                 &dpte_row_height_luma[k],
2094                                 &dpte_row_height_linear_luma[k],
2095                                 &PixelPTEBytesPerRowY_one_row_per_frame[k],
2096                                 &dpte_row_width_luma_ub_one_row_per_frame[k],
2097                                 &dpte_row_height_luma_one_row_per_frame[k],
2098                                 &meta_req_width[k],
2099                                 &meta_req_height[k],
2100                                 &meta_row_width[k],
2101                                 &meta_row_height[k],
2102                                 &PixelPTEReqWidthY[k],
2103                                 &PixelPTEReqHeightY[k],
2104                                 &PTERequestSizeY[k],
2105                                 &dpde0_bytes_per_frame_ub_l[k],
2106                                 &meta_pte_bytes_per_frame_ub_l[k]);
2107
2108                 PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
2109                                 myPipe[k].VRatio,
2110                                 myPipe[k].VTaps,
2111                                 myPipe[k].InterlaceEnable,
2112                                 myPipe[k].ProgressiveToInterlaceUnitInOPP,
2113                                 myPipe[k].SwathHeightY,
2114                                 myPipe[k].SourceRotation,
2115                                 myPipe[k].ViewportStationary,
2116                                 SwathWidthY[k],
2117                                 myPipe[k].ViewportHeight,
2118                                 myPipe[k].ViewportXStart,
2119                                 myPipe[k].ViewportYStart,
2120
2121                                 /* Output */
2122                                 &VInitPreFillY[k],
2123                                 &MaxNumSwathY[k]);
2124
2125                 PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2126                 MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];
2127
2128                 if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
2129                                 PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
2130                         PTEBufferSizeNotExceeded[k] = true;
2131                 } else {
2132                         PTEBufferSizeNotExceeded[k] = false;
2133                 }
2134
2135                 one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
2136                         PTEBufferSizeInRequestsForLuma[k] &&
2137                         PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
2138         }
2139
2140         dml32_CalculateMALLUseForStaticScreen(
2141                         NumberOfActiveSurfaces,
2142                         MALLAllocatedForDCN,
2143                         UseMALLForStaticScreen,   // mode
2144                         SurfaceSizeInMALL,
2145                         one_row_per_frame_fits_in_buffer,
2146                         /* Output */
2147                         UsesMALLForStaticScreen); // boolen
2148
2149         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2150                 PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2151                                 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2152                                 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2153                                 (GPUVMMinPageSizeKBytes[k] > 64);
2154                 BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
2155         }
2156
2157         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2158 #ifdef __DML_VBA_DEBUG__
2159                 dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n",  __func__, k, SurfaceSizeInMALL[k]);
2160                 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2161 #endif
2162                 use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2163                                 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2164                                 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2165                                 (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));
2166
2167                 use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
2168                                 !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
2169
2170                 if (use_one_row_for_frame[k]) {
2171                         dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
2172                         dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
2173                         PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
2174                         dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
2175                         dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
2176                         PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
2177                         PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
2178                 }
2179
2180                 if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
2181                         DCCMetaBufferSizeNotExceeded[k] = true;
2182                 else
2183                         DCCMetaBufferSizeNotExceeded[k] = false;
2184
2185                 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
2186                 if (use_one_row_for_frame[k])
2187                         PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
2188
2189                 dml32_CalculateRowBandwidth(
2190                                 GPUVMEnable,
2191                                 myPipe[k].SourcePixelFormat,
2192                                 myPipe[k].VRatio,
2193                                 myPipe[k].VRatioChroma,
2194                                 myPipe[k].DCCEnable,
2195                                 myPipe[k].HTotal / myPipe[k].PixelClock,
2196                                 MetaRowByteY[k], MetaRowByteC[k],
2197                                 meta_row_height[k],
2198                                 meta_row_height_chroma[k],
2199                                 PixelPTEBytesPerRowY[k],
2200                                 PixelPTEBytesPerRowC[k],
2201                                 dpte_row_height_luma[k],
2202                                 dpte_row_height_chroma[k],
2203
2204                                 /* Output */
2205                                 &meta_row_bw[k],
2206                                 &dpte_row_bw[k]);
2207 #ifdef __DML_VBA_DEBUG__
2208                 dml_print("DML::%s: k=%d, use_one_row_for_frame        = %d\n",  __func__, k, use_one_row_for_frame[k]);
2209                 dml_print("DML::%s: k=%d, use_one_row_for_frame_flip   = %d\n",
2210                                 __func__, k, use_one_row_for_frame_flip[k]);
2211                 dml_print("DML::%s: k=%d, UseMALLForPStateChange       = %d\n",
2212                                 __func__, k, UseMALLForPStateChange[k]);
2213                 dml_print("DML::%s: k=%d, dpte_row_height_luma         = %d\n",  __func__, k, dpte_row_height_luma[k]);
2214                 dml_print("DML::%s: k=%d, dpte_row_width_luma_ub       = %d\n",
2215                                 __func__, k, dpte_row_width_luma_ub[k]);
2216                 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY         = %d\n",  __func__, k, PixelPTEBytesPerRowY[k]);
2217                 dml_print("DML::%s: k=%d, dpte_row_height_chroma       = %d\n",
2218                                 __func__, k, dpte_row_height_chroma[k]);
2219                 dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub     = %d\n",
2220                                 __func__, k, dpte_row_width_chroma_ub[k]);
2221                 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC         = %d\n",  __func__, k, PixelPTEBytesPerRowC[k]);
2222                 dml_print("DML::%s: k=%d, PixelPTEBytesPerRow          = %d\n",  __func__, k, PixelPTEBytesPerRow[k]);
2223                 dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded     = %d\n",
2224                                 __func__, k, PTEBufferSizeNotExceeded[k]);
2225                 dml_print("DML::%s: k=%d, PTE_BUFFER_MODE              = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
2226                 dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE           = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
2227 #endif
2228         }
2229 } // CalculateVMRowAndSwath
2230
2231 unsigned int dml32_CalculateVMAndRowBytes(
2232                 bool ViewportStationary,
2233                 bool DCCEnable,
2234                 unsigned int NumberOfDPPs,
2235                 unsigned int BlockHeight256Bytes,
2236                 unsigned int BlockWidth256Bytes,
2237                 enum source_format_class SourcePixelFormat,
2238                 unsigned int SurfaceTiling,
2239                 unsigned int BytePerPixel,
2240                 enum dm_rotation_angle SourceRotation,
2241                 double SwathWidth,
2242                 unsigned int ViewportHeight,
2243                 unsigned int    ViewportXStart,
2244                 unsigned int    ViewportYStart,
2245                 bool GPUVMEnable,
2246                 bool HostVMEnable,
2247                 unsigned int HostVMMaxNonCachedPageTableLevels,
2248                 unsigned int GPUVMMaxPageTableLevels,
2249                 unsigned int GPUVMMinPageSizeKBytes,
2250                 unsigned int HostVMMinPageSize,
2251                 unsigned int PTEBufferSizeInRequests,
2252                 unsigned int Pitch,
2253                 unsigned int DCCMetaPitch,
2254                 unsigned int MacroTileWidth,
2255                 unsigned int MacroTileHeight,
2256
2257                 /* Output */
2258                 unsigned int *MetaRowByte,
2259                 unsigned int *PixelPTEBytesPerRow,
2260                 unsigned int    *dpte_row_width_ub,
2261                 unsigned int *dpte_row_height,
2262                 unsigned int *dpte_row_height_linear,
2263                 unsigned int    *PixelPTEBytesPerRow_one_row_per_frame,
2264                 unsigned int    *dpte_row_width_ub_one_row_per_frame,
2265                 unsigned int    *dpte_row_height_one_row_per_frame,
2266                 unsigned int *MetaRequestWidth,
2267                 unsigned int *MetaRequestHeight,
2268                 unsigned int *meta_row_width,
2269                 unsigned int *meta_row_height,
2270                 unsigned int *PixelPTEReqWidth,
2271                 unsigned int *PixelPTEReqHeight,
2272                 unsigned int *PTERequestSize,
2273                 unsigned int    *DPDE0BytesFrame,
2274                 unsigned int    *MetaPTEBytesFrame)
2275 {
2276         unsigned int MPDEBytesFrame;
2277         unsigned int DCCMetaSurfaceBytes;
2278         unsigned int ExtraDPDEBytesFrame;
2279         unsigned int PDEAndMetaPTEBytesFrame;
2280         unsigned int HostVMDynamicLevels = 0;
2281         unsigned int    MacroTileSizeBytes;
2282         unsigned int    vp_height_meta_ub;
2283         unsigned int    vp_height_dpte_ub;
2284         unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
2285
2286         if (GPUVMEnable == true && HostVMEnable == true) {
2287                 if (HostVMMinPageSize < 2048)
2288                         HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
2289                 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
2290                         HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
2291                 else
2292                         HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
2293         }
2294
2295         *MetaRequestHeight = 8 * BlockHeight256Bytes;
2296         *MetaRequestWidth = 8 * BlockWidth256Bytes;
2297         if (SurfaceTiling == dm_sw_linear) {
2298                 *meta_row_height = 32;
2299                 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
2300                                 - dml_floor(ViewportXStart, *MetaRequestWidth);
2301         } else if (!IsVertical(SourceRotation)) {
2302                 *meta_row_height = *MetaRequestHeight;
2303                 if (ViewportStationary && NumberOfDPPs == 1) {
2304                         *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
2305                                         *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
2306                 } else {
2307                         *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
2308                 }
2309                 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
2310         } else {
2311                 *meta_row_height = *MetaRequestWidth;
2312                 if (ViewportStationary && NumberOfDPPs == 1) {
2313                         *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
2314                                         *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
2315                 } else {
2316                         *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
2317                 }
2318                 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
2319         }
2320
2321         if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2322                 vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
2323                                 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
2324         } else if (!IsVertical(SourceRotation)) {
2325                 vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2326         } else {
2327                 vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2328         }
2329
2330         DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
2331
2332         if (GPUVMEnable == true) {
2333                 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
2334                                 (8 * 4.0 * 1024), 1) + 1) * 64;
2335                 MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
2336         } else {
2337                 *MetaPTEBytesFrame = 0;
2338                 MPDEBytesFrame = 0;
2339         }
2340
2341         if (DCCEnable != true) {
2342                 *MetaPTEBytesFrame = 0;
2343                 MPDEBytesFrame = 0;
2344                 *MetaRowByte = 0;
2345         }
2346
2347         MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
2348
2349         if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
2350                 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2351                         vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
2352                                         MacroTileHeight - 1, MacroTileHeight) -
2353                                         dml_floor(ViewportYStart, MacroTileHeight);
2354                 } else if (!IsVertical(SourceRotation)) {
2355                         vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
2356                 } else {
2357                         vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
2358                 }
2359                 *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
2360                                 (8 * 2097152), 1) + 1);
2361                 ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
2362         } else {
2363                 *DPDE0BytesFrame = 0;
2364                 ExtraDPDEBytesFrame = 0;
2365                 vp_height_dpte_ub = 0;
2366         }
2367
2368         PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2369
2370 #ifdef __DML_VBA_DEBUG__
2371         dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
2372         dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
2373         dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
2374         dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
2375         dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
2376         dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
2377         dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
2378         dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
2379         dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
2380         dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
2381         dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
2382         dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
2383         dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
2384         dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2385         dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
2386         dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
2387         dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
2388 #endif
2389
2390         if (HostVMEnable == true)
2391                 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2392
2393         if (SurfaceTiling == dm_sw_linear) {
2394                 *PixelPTEReqHeight = 1;
2395                 *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2396                 PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2397                 *PTERequestSize = 64;
2398         } else if (GPUVMMinPageSizeKBytes == 4) {
2399                 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2400                 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2401                 *PTERequestSize = 128;
2402         } else {
2403                 *PixelPTEReqHeight = MacroTileHeight;
2404                 *PixelPTEReqWidth = 8 *  1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
2405                 *PTERequestSize = 64;
2406         }
2407 #ifdef __DML_VBA_DEBUG__
2408         dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2409         dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
2410         dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
2411         dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
2412         dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
2413         dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
2414         dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
2415 #endif
2416
2417         *dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
2418         *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
2419                         (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
2420                                         (double) *PixelPTEReqWidth;
2421         *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
2422                         *PTERequestSize;
2423
2424         if (SurfaceTiling == dm_sw_linear) {
2425                 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2426                                 *PixelPTEReqWidth / Pitch), 1));
2427 #ifdef __DML_VBA_DEBUG__
2428                 dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
2429                                 PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
2430                 dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
2431                                 dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
2432                 dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
2433                                 dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2434                 dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
2435                                 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2436                                                 *PixelPTEReqWidth / Pitch), 1));
2437                 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2438 #endif
2439                 *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
2440                                 (double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
2441                 *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
2442
2443                 // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
2444                 *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2445                                 PixelPTEReqWidth_linear / Pitch), 1);
2446                 if (*dpte_row_height_linear > 128)
2447                         *dpte_row_height_linear = 128;
2448
2449         } else if (!IsVertical(SourceRotation)) {
2450                 *dpte_row_height = *PixelPTEReqHeight;
2451
2452                 if (GPUVMMinPageSizeKBytes > 64) {
2453                         *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
2454                                         *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2455                 } else if (ViewportStationary && (NumberOfDPPs == 1)) {
2456                         *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
2457                                         *PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
2458                                         dml_floor(ViewportXStart, *PixelPTEReqWidth);
2459                 } else {
2460                         *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
2461                                         *PixelPTEReqWidth;
2462                 }
2463
2464                 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2465         } else {
2466                 *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);
2467
2468                 if (ViewportStationary && (NumberOfDPPs == 1)) {
2469                         *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
2470                                         *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
2471                 } else {
2472                         *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
2473                                         * *PixelPTEReqHeight;
2474                 }
2475
2476                 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2477         }
2478
2479         if (GPUVMEnable != true)
2480                 *PixelPTEBytesPerRow = 0;
2481         if (HostVMEnable == true)
2482                 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2483
2484 #ifdef __DML_VBA_DEBUG__
2485         dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2486         dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2487         dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
2488         dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
2489         dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
2490         dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
2491         dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
2492         dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
2493                         __func__, *dpte_row_width_ub_one_row_per_frame);
2494         dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
2495                         __func__, *PixelPTEBytesPerRow_one_row_per_frame);
2496         dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
2497                         *MetaPTEBytesFrame);
2498 #endif
2499
2500         return PDEAndMetaPTEBytesFrame;
2501 } // CalculateVMAndRowBytes
2502
2503 double dml32_CalculatePrefetchSourceLines(
2504                 double VRatio,
2505                 unsigned int VTaps,
2506                 bool Interlace,
2507                 bool ProgressiveToInterlaceUnitInOPP,
2508                 unsigned int SwathHeight,
2509                 enum dm_rotation_angle SourceRotation,
2510                 bool ViewportStationary,
2511                 double SwathWidth,
2512                 unsigned int ViewportHeight,
2513                 unsigned int ViewportXStart,
2514                 unsigned int ViewportYStart,
2515
2516                 /* Output */
2517                 double *VInitPreFill,
2518                 unsigned int *MaxNumSwath)
2519 {
2520
2521         unsigned int vp_start_rot;
2522         unsigned int sw0_tmp;
2523         unsigned int MaxPartialSwath;
2524         double numLines;
2525
2526 #ifdef __DML_VBA_DEBUG__
2527         dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
2528         dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
2529         dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
2530         dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
2531         dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
2532         dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
2533 #endif
2534         if (ProgressiveToInterlaceUnitInOPP)
2535                 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
2536         else
2537                 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
2538
2539         if (ViewportStationary) {
2540                 if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
2541                         vp_start_rot = SwathHeight -
2542                                         (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
2543                 } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
2544                         vp_start_rot = ViewportXStart;
2545                 } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
2546                         vp_start_rot = SwathHeight -
2547                                         (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
2548                 } else {
2549                         vp_start_rot = ViewportYStart;
2550                 }
2551                 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
2552                 if (sw0_tmp < *VInitPreFill)
2553                         *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
2554                 else
2555                         *MaxNumSwath = 1;
2556                 MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
2557         } else {
2558                 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
2559                 if (*VInitPreFill > 1)
2560                         MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
2561                 else
2562                         MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
2563         }
2564         numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
2565
2566 #ifdef __DML_VBA_DEBUG__
2567         dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
2568         dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
2569         dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
2570         dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
2571         dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
2572 #endif
2573         return numLines;
2574
2575 } // CalculatePrefetchSourceLines
2576
2577 void dml32_CalculateMALLUseForStaticScreen(
2578                 unsigned int NumberOfActiveSurfaces,
2579                 unsigned int MALLAllocatedForDCNFinal,
2580                 enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
2581                 unsigned int SurfaceSizeInMALL[],
2582                 bool one_row_per_frame_fits_in_buffer[],
2583
2584                 /* output */
2585                 bool UsesMALLForStaticScreen[])
2586 {
2587         unsigned int k;
2588         unsigned int SurfaceToAddToMALL;
2589         bool CanAddAnotherSurfaceToMALL;
2590         unsigned int TotalSurfaceSizeInMALL;
2591
2592         TotalSurfaceSizeInMALL = 0;
2593         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2594                 UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
2595                 if (UsesMALLForStaticScreen[k])
2596                         TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
2597 #ifdef __DML_VBA_DEBUG__
2598                 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2599                 dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n",  __func__, k, TotalSurfaceSizeInMALL);
2600 #endif
2601         }
2602
2603         SurfaceToAddToMALL = 0;
2604         CanAddAnotherSurfaceToMALL = true;
2605         while (CanAddAnotherSurfaceToMALL) {
2606                 CanAddAnotherSurfaceToMALL = false;
2607                 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2608                         if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
2609                                         !UsesMALLForStaticScreen[k] &&
2610                                         UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
2611                                         one_row_per_frame_fits_in_buffer[k] &&
2612                                         (!CanAddAnotherSurfaceToMALL ||
2613                                         SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
2614                                 CanAddAnotherSurfaceToMALL = true;
2615                                 SurfaceToAddToMALL = k;
2616 #ifdef __DML_VBA_DEBUG__
2617                                 dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
2618                                                 __func__, k, UseMALLForStaticScreen[k]);
2619 #endif
2620                         }
2621                 }
2622                 if (CanAddAnotherSurfaceToMALL) {
2623                         UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
2624                         TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
2625
2626 #ifdef __DML_VBA_DEBUG__
2627                         dml_print("DML::%s: SurfaceToAddToMALL       = %d\n",  __func__, SurfaceToAddToMALL);
2628                         dml_print("DML::%s: TotalSurfaceSizeInMALL   = %d\n",  __func__, TotalSurfaceSizeInMALL);
2629 #endif
2630
2631                 }
2632         }
2633 }
2634
2635 void dml32_CalculateRowBandwidth(
2636                 bool GPUVMEnable,
2637                 enum source_format_class SourcePixelFormat,
2638                 double VRatio,
2639                 double VRatioChroma,
2640                 bool DCCEnable,
2641                 double LineTime,
2642                 unsigned int MetaRowByteLuma,
2643                 unsigned int MetaRowByteChroma,
2644                 unsigned int meta_row_height_luma,
2645                 unsigned int meta_row_height_chroma,
2646                 unsigned int PixelPTEBytesPerRowLuma,
2647                 unsigned int PixelPTEBytesPerRowChroma,
2648                 unsigned int dpte_row_height_luma,
2649                 unsigned int dpte_row_height_chroma,
2650                 /* Output */
2651                 double *meta_row_bw,
2652                 double *dpte_row_bw)
2653 {
2654         if (DCCEnable != true) {
2655                 *meta_row_bw = 0;
2656         } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2657                         SourcePixelFormat == dm_rgbe_alpha) {
2658                 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
2659                                 MetaRowByteChroma / (meta_row_height_chroma * LineTime);
2660         } else {
2661                 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
2662         }
2663
2664         if (GPUVMEnable != true) {
2665                 *dpte_row_bw = 0;
2666         } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2667                         SourcePixelFormat == dm_rgbe_alpha) {
2668                 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
2669                                 VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
2670         } else {
2671                 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
2672         }
2673 }
2674
2675 double dml32_CalculateUrgentLatency(
2676                 double UrgentLatencyPixelDataOnly,
2677                 double UrgentLatencyPixelMixedWithVMData,
2678                 double UrgentLatencyVMDataOnly,
2679                 bool   DoUrgentLatencyAdjustment,
2680                 double UrgentLatencyAdjustmentFabricClockComponent,
2681                 double UrgentLatencyAdjustmentFabricClockReference,
2682                 double FabricClock)
2683 {
2684         double   ret;
2685
2686         ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
2687         if (DoUrgentLatencyAdjustment == true) {
2688                 ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
2689                                 (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
2690         }
2691         return ret;
2692 }
2693
2694 void dml32_CalculateUrgentBurstFactor(
2695                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
2696                 unsigned int    swath_width_luma_ub,
2697                 unsigned int    swath_width_chroma_ub,
2698                 unsigned int SwathHeightY,
2699                 unsigned int SwathHeightC,
2700                 double  LineTime,
2701                 double  UrgentLatency,
2702                 double  CursorBufferSize,
2703                 unsigned int CursorWidth,
2704                 unsigned int CursorBPP,
2705                 double  VRatio,
2706                 double  VRatioC,
2707                 double  BytePerPixelInDETY,
2708                 double  BytePerPixelInDETC,
2709                 unsigned int    DETBufferSizeY,
2710                 unsigned int    DETBufferSizeC,
2711                 /* Output */
2712                 double *UrgentBurstFactorCursor,
2713                 double *UrgentBurstFactorLuma,
2714                 double *UrgentBurstFactorChroma,
2715                 bool   *NotEnoughUrgentLatencyHiding)
2716 {
2717         double       LinesInDETLuma;
2718         double       LinesInDETChroma;
2719         unsigned int LinesInCursorBuffer;
2720         double       CursorBufferSizeInTime;
2721         double       DETBufferSizeInTimeLuma;
2722         double       DETBufferSizeInTimeChroma;
2723
2724         *NotEnoughUrgentLatencyHiding = 0;
2725
2726         if (CursorWidth > 0) {
2727                 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
2728                                 (CursorWidth * CursorBPP / 8.0)), 1.0);
2729                 if (VRatio > 0) {
2730                         CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
2731                         if (CursorBufferSizeInTime - UrgentLatency <= 0) {
2732                                 *NotEnoughUrgentLatencyHiding = 1;
2733                                 *UrgentBurstFactorCursor = 0;
2734                         } else {
2735                                 *UrgentBurstFactorCursor = CursorBufferSizeInTime /
2736                                                 (CursorBufferSizeInTime - UrgentLatency);
2737                         }
2738                 } else {
2739                         *UrgentBurstFactorCursor = 1;
2740                 }
2741         }
2742
2743         LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
2744                         DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
2745
2746         if (VRatio > 0) {
2747                 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
2748                 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
2749                         *NotEnoughUrgentLatencyHiding = 1;
2750                         *UrgentBurstFactorLuma = 0;
2751                 } else {
2752                         *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
2753                 }
2754         } else {
2755                 *UrgentBurstFactorLuma = 1;
2756         }
2757
2758         if (BytePerPixelInDETC > 0) {
2759                 LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
2760                                         1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
2761                                         / swath_width_chroma_ub;
2762
2763                 if (VRatio > 0) {
2764                         DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
2765                         if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
2766                                 *NotEnoughUrgentLatencyHiding = 1;
2767                                 *UrgentBurstFactorChroma = 0;
2768                         } else {
2769                                 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
2770                                                 / (DETBufferSizeInTimeChroma - UrgentLatency);
2771                         }
2772                 } else {
2773                         *UrgentBurstFactorChroma = 1;
2774                 }
2775         }
2776 } // CalculateUrgentBurstFactor
2777
2778 void dml32_CalculateDCFCLKDeepSleep(
2779                 unsigned int NumberOfActiveSurfaces,
2780                 unsigned int BytePerPixelY[],
2781                 unsigned int BytePerPixelC[],
2782                 double VRatio[],
2783                 double VRatioChroma[],
2784                 double SwathWidthY[],
2785                 double SwathWidthC[],
2786                 unsigned int DPPPerSurface[],
2787                 double HRatio[],
2788                 double HRatioChroma[],
2789                 double PixelClock[],
2790                 double PSCL_THROUGHPUT[],
2791                 double PSCL_THROUGHPUT_CHROMA[],
2792                 double Dppclk[],
2793                 double ReadBandwidthLuma[],
2794                 double ReadBandwidthChroma[],
2795                 unsigned int ReturnBusWidth,
2796
2797                 /* Output */
2798                 double *DCFClkDeepSleep)
2799 {
2800         unsigned int k;
2801         double   DisplayPipeLineDeliveryTimeLuma;
2802         double   DisplayPipeLineDeliveryTimeChroma;
2803         double   DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
2804         double ReadBandwidth = 0.0;
2805
2806         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2807
2808                 if (VRatio[k] <= 1) {
2809                         DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
2810                                         / PixelClock[k];
2811                 } else {
2812                         DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
2813                 }
2814                 if (BytePerPixelC[k] == 0) {
2815                         DisplayPipeLineDeliveryTimeChroma = 0;
2816                 } else {
2817                         if (VRatioChroma[k] <= 1) {
2818                                 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
2819                                                 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
2820                         } else {
2821                                 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
2822                                                 / Dppclk[k];
2823                         }
2824                 }
2825
2826                 if (BytePerPixelC[k] > 0) {
2827                         DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
2828                                         BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
2829                                         __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
2830                                         32.0 / DisplayPipeLineDeliveryTimeChroma);
2831                 } else {
2832                         DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
2833                                         64.0 / DisplayPipeLineDeliveryTimeLuma;
2834                 }
2835                 DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
2836
2837 #ifdef __DML_VBA_DEBUG__
2838                 dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
2839                 dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
2840 #endif
2841         }
2842
2843         for (k = 0; k < NumberOfActiveSurfaces; ++k)
2844                 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
2845
2846         *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);
2847
2848 #ifdef __DML_VBA_DEBUG__
2849         dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
2850         dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
2851         dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
2852         dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
2853 #endif
2854
2855         for (k = 0; k < NumberOfActiveSurfaces; ++k)
2856                 *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
2857 #ifdef __DML_VBA_DEBUG__
2858         dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
2859 #endif
2860 } // CalculateDCFCLKDeepSleep
2861
2862 double dml32_CalculateWriteBackDelay(
2863                 enum source_format_class WritebackPixelFormat,
2864                 double WritebackHRatio,
2865                 double WritebackVRatio,
2866                 unsigned int WritebackVTaps,
2867                 unsigned int         WritebackDestinationWidth,
2868                 unsigned int         WritebackDestinationHeight,
2869                 unsigned int         WritebackSourceHeight,
2870                 unsigned int HTotal)
2871 {
2872         double CalculateWriteBackDelay;
2873         double Line_length;
2874         double Output_lines_last_notclamped;
2875         double WritebackVInit;
2876
2877         WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
2878         Line_length = dml_max((double) WritebackDestinationWidth,
2879                         dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
2880         Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
2881                         dml_ceil(((double)WritebackSourceHeight -
2882                                         (double) WritebackVInit) / (double)WritebackVRatio, 1.0);
2883         if (Output_lines_last_notclamped < 0) {
2884                 CalculateWriteBackDelay = 0;
2885         } else {
2886                 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
2887                                 (HTotal - WritebackDestinationWidth) + 80;
2888         }
2889         return CalculateWriteBackDelay;
2890 }
2891
2892 void dml32_UseMinimumDCFCLK(
2893                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
2894                 bool DRRDisplay[],
2895                 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2896                 unsigned int MaxInterDCNTileRepeaters,
2897                 unsigned int MaxPrefetchMode,
2898                 double DRAMClockChangeLatencyFinal,
2899                 double FCLKChangeLatency,
2900                 double SREnterPlusExitTime,
2901                 unsigned int ReturnBusWidth,
2902                 unsigned int RoundTripPingLatencyCycles,
2903                 unsigned int ReorderingBytes,
2904                 unsigned int PixelChunkSizeInKByte,
2905                 unsigned int MetaChunkSize,
2906                 bool GPUVMEnable,
2907                 unsigned int GPUVMMaxPageTableLevels,
2908                 bool HostVMEnable,
2909                 unsigned int NumberOfActiveSurfaces,
2910                 double HostVMMinPageSize,
2911                 unsigned int HostVMMaxNonCachedPageTableLevels,
2912                 bool DynamicMetadataVMEnabled,
2913                 bool ImmediateFlipRequirement,
2914                 bool ProgressiveToInterlaceUnitInOPP,
2915                 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
2916                 double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
2917                 unsigned int VTotal[],
2918                 unsigned int VActive[],
2919                 unsigned int DynamicMetadataTransmittedBytes[],
2920                 unsigned int DynamicMetadataLinesBeforeActiveRequired[],
2921                 bool Interlace[],
2922                 double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
2923                 double RequiredDISPCLK[][2],
2924                 double UrgLatency[],
2925                 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
2926                 double ProjectedDCFClkDeepSleep[][2],
2927                 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
2928                 unsigned int TotalNumberOfActiveDPP[][2],
2929                 unsigned int TotalNumberOfDCCActiveDPP[][2],
2930                 unsigned int dpte_group_bytes[],
2931                 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
2932                 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
2933                 unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
2934                 unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
2935                 unsigned int BytePerPixelY[],
2936                 unsigned int BytePerPixelC[],
2937                 unsigned int HTotal[],
2938                 double PixelClock[],
2939                 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
2940                 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
2941                 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
2942                 bool DynamicMetadataEnable[],
2943                 double ReadBandwidthLuma[],
2944                 double ReadBandwidthChroma[],
2945                 double DCFCLKPerState[],
2946                 /* Output */
2947                 double DCFCLKState[][2])
2948 {
2949         unsigned int i, j, k;
2950         unsigned int     dummy1;
2951         double dummy2, dummy3;
2952         double   NormalEfficiency;
2953         double   TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
2954
2955         NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
2956         for  (i = 0; i < DC__VOLTAGE_STATES; ++i) {
2957                 for  (j = 0; j <= 1; ++j) {
2958                         double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
2959                         double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
2960                         double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
2961                         double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
2962                         double MinimumTWait = 0.0;
2963                         double DPTEBandwidth;
2964                         double DCFCLKRequiredForAverageBandwidth;
2965                         unsigned int ExtraLatencyBytes;
2966                         double ExtraLatencyCycles;
2967                         double DCFCLKRequiredForPeakBandwidth;
2968                         unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
2969                         double MinimumTvmPlus2Tr0;
2970
2971                         TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
2972                         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2973                                 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
2974                                                 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
2975                                                                 / (15.75 * HTotal[k] / PixelClock[k]);
2976                         }
2977
2978                         for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
2979                                 NoOfDPPState[k] = NoOfDPP[i][j][k];
2980
2981                         DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
2982                         DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
2983
2984                         ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
2985                                         TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
2986                                         TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
2987                                         NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
2988                                         HostVMMaxNonCachedPageTableLevels);
2989                         ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
2990                                         + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
2991                         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2992                                 double DCFCLKCyclesRequiredInPrefetch;
2993                                 double PrefetchTime;
2994
2995                                 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
2996                                                 * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
2997                                                 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
2998                                                                 * BytePerPixelC[k]) / NormalEfficiency
2999                                                 / ReturnBusWidth;
3000                                 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
3001                                                 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
3002                                                                 / NormalEfficiency / ReturnBusWidth
3003                                                                 * (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
3004                                                 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
3005                                                                 / ReturnBusWidth
3006                                                 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
3007                                                 + PixelDCFCLKCyclesRequiredInPrefetch[k];
3008                                 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
3009                                                 * HTotal[k] / PixelClock[k];
3010                                 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
3011                                                 DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
3012                                                 UrgLatency[i] * GPUVMMaxPageTableLevels *
3013                                                 (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
3014
3015                                 MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
3016                                                 UseMALLForPStateChange[k],
3017                                                 SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3018                                                 DRRDisplay[k],
3019                                                 DRAMClockChangeLatencyFinal,
3020                                                 FCLKChangeLatency,
3021                                                 UrgLatency[i],
3022                                                 SREnterPlusExitTime);
3023
3024                                 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
3025                                                 MinimumTWait - UrgLatency[i] *
3026                                                 ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
3027                                                 GPUVMMaxPageTableLevels - 2) *  (HostVMEnable == true ?
3028                                                 HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
3029                                                 DynamicMetadataVMExtraLatency[k];
3030
3031                                 if (PrefetchTime > 0) {
3032                                         double ExpectedVRatioPrefetch;
3033
3034                                         ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
3035                                                         PixelDCFCLKCyclesRequiredInPrefetch[k] /
3036                                                         DCFCLKCyclesRequiredInPrefetch);
3037                                         DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
3038                                                         PixelDCFCLKCyclesRequiredInPrefetch[k] /
3039                                                         PrefetchPixelLinesTime[k] *
3040                                                         dml_max(1.0, ExpectedVRatioPrefetch) *
3041                                                         dml_max(1.0, ExpectedVRatioPrefetch / 4);
3042                                         if (HostVMEnable == true || ImmediateFlipRequirement == true) {
3043                                                 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3044                                                                 DCFCLKRequiredForPeakBandwidthPerSurface[k] +
3045                                                                 NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
3046                                                                 NormalEfficiency / ReturnBusWidth;
3047                                         }
3048                                 } else {
3049                                         DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3050                                 }
3051                                 if (DynamicMetadataEnable[k] == true) {
3052                                         double TSetupPipe;
3053                                         double TdmbfPipe;
3054                                         double TdmsksPipe;
3055                                         double TdmecPipe;
3056                                         double AllowedTimeForUrgentExtraLatency;
3057
3058                                         dml32_CalculateVUpdateAndDynamicMetadataParameters(
3059                                                         MaxInterDCNTileRepeaters,
3060                                                         RequiredDPPCLKPerSurface[i][j][k],
3061                                                         RequiredDISPCLK[i][j],
3062                                                         ProjectedDCFClkDeepSleep[i][j],
3063                                                         PixelClock[k],
3064                                                         HTotal[k],
3065                                                         VTotal[k] - VActive[k],
3066                                                         DynamicMetadataTransmittedBytes[k],
3067                                                         DynamicMetadataLinesBeforeActiveRequired[k],
3068                                                         Interlace[k],
3069                                                         ProgressiveToInterlaceUnitInOPP,
3070
3071                                                         /* output */
3072                                                         &TSetupPipe,
3073                                                         &TdmbfPipe,
3074                                                         &TdmecPipe,
3075                                                         &TdmsksPipe,
3076                                                         &dummy1,
3077                                                         &dummy2,
3078                                                         &dummy3);
3079                                         AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
3080                                                         PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
3081                                                         TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
3082                                         if (AllowedTimeForUrgentExtraLatency > 0)
3083                                                 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3084                                                                 dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
3085                                                                 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
3086                                         else
3087                                                 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3088                                 }
3089                         }
3090                         DCFCLKRequiredForPeakBandwidth = 0;
3091                         for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
3092                                 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
3093                                                 DCFCLKRequiredForPeakBandwidthPerSurface[k];
3094                         }
3095                         MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
3096                                         (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
3097                                         (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
3098                         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3099                                 double MaximumTvmPlus2Tr0PlusTsw;
3100
3101                                 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
3102                                                 PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
3103                                 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
3104                                         DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
3105                                 } else {
3106                                         DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
3107                                                         2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
3108                                                                 MinimumTvmPlus2Tr0 -
3109                                                                 PrefetchPixelLinesTime[k] / 4),
3110                                                         (2 * ExtraLatencyCycles +
3111                                                                 PixelDCFCLKCyclesRequiredInPrefetch[k]) /
3112                                                                 (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
3113                                 }
3114                         }
3115                         DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
3116                                         dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
3117                 }
3118         }
3119 }
3120
3121 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
3122                 unsigned int TotalNumberOfActiveDPP,
3123                 unsigned int PixelChunkSizeInKByte,
3124                 unsigned int TotalNumberOfDCCActiveDPP,
3125                 unsigned int MetaChunkSize,
3126                 bool GPUVMEnable,
3127                 bool HostVMEnable,
3128                 unsigned int NumberOfActiveSurfaces,
3129                 unsigned int NumberOfDPP[],
3130                 unsigned int dpte_group_bytes[],
3131                 double HostVMInefficiencyFactor,
3132                 double HostVMMinPageSize,
3133                 unsigned int HostVMMaxNonCachedPageTableLevels)
3134 {
3135         unsigned int k;
3136         double   ret;
3137         unsigned int  HostVMDynamicLevels;
3138
3139         if (GPUVMEnable == true && HostVMEnable == true) {
3140                 if (HostVMMinPageSize < 2048)
3141                         HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
3142                 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
3143                         HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
3144                 else
3145                         HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
3146         } else {
3147                 HostVMDynamicLevels = 0;
3148         }
3149
3150         ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
3151                         TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
3152
3153         if (GPUVMEnable == true) {
3154                 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3155                         ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
3156                                         (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
3157                 }
3158         }
3159         return ret;
3160 }
3161
3162 void dml32_CalculateVUpdateAndDynamicMetadataParameters(
3163                 unsigned int MaxInterDCNTileRepeaters,
3164                 double Dppclk,
3165                 double Dispclk,
3166                 double DCFClkDeepSleep,
3167                 double PixelClock,
3168                 unsigned int HTotal,
3169                 unsigned int VBlank,
3170                 unsigned int DynamicMetadataTransmittedBytes,
3171                 unsigned int DynamicMetadataLinesBeforeActiveRequired,
3172                 unsigned int InterlaceEnable,
3173                 bool ProgressiveToInterlaceUnitInOPP,
3174
3175                 /* output */
3176                 double *TSetup,
3177                 double *Tdmbf,
3178                 double *Tdmec,
3179                 double *Tdmsks,
3180                 unsigned int *VUpdateOffsetPix,
3181                 double *VUpdateWidthPix,
3182                 double *VReadyOffsetPix)
3183 {
3184         double TotalRepeaterDelayTime;
3185
3186         TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
3187         *VUpdateWidthPix  =
3188                         dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
3189         *VReadyOffsetPix  = dml_ceil(dml_max(150.0 / Dppclk,
3190                         TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
3191         *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
3192         *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3193         *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
3194         *Tdmec = HTotal / PixelClock;
3195
3196         if (DynamicMetadataLinesBeforeActiveRequired == 0)
3197                 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3198         else
3199                 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3200
3201         if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
3202                 *Tdmsks = *Tdmsks / 2;
3203 #ifdef __DML_VBA_DEBUG__
3204         dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3205         dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3206         dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3207
3208         dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
3209                         __func__, DynamicMetadataLinesBeforeActiveRequired);
3210         dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
3211         dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
3212         dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
3213         dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
3214 #endif
3215 }
3216
3217 double dml32_CalculateTWait(
3218                 unsigned int PrefetchMode,
3219                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
3220                 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3221                 bool DRRDisplay,
3222                 double DRAMClockChangeLatency,
3223                 double FCLKChangeLatency,
3224                 double UrgentLatency,
3225                 double SREnterPlusExitTime)
3226 {
3227         double TWait = 0.0;
3228
3229         if (PrefetchMode == 0 &&
3230                         !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
3231                         !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
3232                         !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
3233                         !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
3234                 TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3235         } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3236                 TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3237         } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3238                 TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
3239         } else {
3240                 TWait = UrgentLatency;
3241         }
3242
3243 #ifdef __DML_VBA_DEBUG__
3244         dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
3245         dml_print("DML::%s: TWait = %f\n", __func__, TWait);
3246 #endif
3247         return TWait;
3248 } // CalculateTWait
3249
3250 // Function: get_return_bw_mbps
3251 // Megabyte per second
3252 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
3253                 const int VoltageLevel,
3254                 const bool HostVMEnable,
3255                 const double DCFCLK,
3256                 const double FabricClock,
3257                 const double DRAMSpeed)
3258 {
3259         double ReturnBW = 0.;
3260         double IdealSDPPortBandwidth    = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
3261         double IdealFabricBandwidth     = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
3262         double IdealDRAMBandwidth       = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
3263         double PixelDataOnlyReturnBW    = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3264                         IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3265                         IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe  :
3266                                         soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3267         double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3268                         IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3269                         IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3270                                         soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3271
3272         if (HostVMEnable != true)
3273                 ReturnBW = PixelDataOnlyReturnBW;
3274         else
3275                 ReturnBW = PixelMixedWithVMDataReturnBW;
3276
3277 #ifdef __DML_VBA_DEBUG__
3278         dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3279         dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
3280         dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3281         dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3282         dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3283         dml_print("DML::%s: IdealSDPPortBandwidth        = %f\n", __func__, IdealSDPPortBandwidth);
3284         dml_print("DML::%s: IdealFabricBandwidth         = %f\n", __func__, IdealFabricBandwidth);
3285         dml_print("DML::%s: IdealDRAMBandwidth           = %f\n", __func__, IdealDRAMBandwidth);
3286         dml_print("DML::%s: PixelDataOnlyReturnBW        = %f\n", __func__, PixelDataOnlyReturnBW);
3287         dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
3288         dml_print("DML::%s: ReturnBW                     = %f MBps\n", __func__, ReturnBW);
3289 #endif
3290         return ReturnBW;
3291 }
3292
3293 // Function: get_return_bw_mbps_vm_only
3294 // Megabyte per second
3295 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
3296                 const int VoltageLevel,
3297                 const double DCFCLK,
3298                 const double FabricClock,
3299                 const double DRAMSpeed)
3300 {
3301         double VMDataOnlyReturnBW = dml_min3(
3302                         soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3303                         FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
3304                                         * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3305                         DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
3306                                         * (VoltageLevel < 2 ?
3307                                                         soc->pct_ideal_dram_bw_after_urgent_strobe :
3308                                                         soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
3309 #ifdef __DML_VBA_DEBUG__
3310         dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3311         dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3312         dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3313         dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3314         dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
3315 #endif
3316         return VMDataOnlyReturnBW;
3317 }
3318
3319 double dml32_CalculateExtraLatency(
3320                 unsigned int RoundTripPingLatencyCycles,
3321                 unsigned int ReorderingBytes,
3322                 double DCFCLK,
3323                 unsigned int TotalNumberOfActiveDPP,
3324                 unsigned int PixelChunkSizeInKByte,
3325                 unsigned int TotalNumberOfDCCActiveDPP,
3326                 unsigned int MetaChunkSize,
3327                 double ReturnBW,
3328                 bool GPUVMEnable,
3329                 bool HostVMEnable,
3330                 unsigned int NumberOfActiveSurfaces,
3331                 unsigned int NumberOfDPP[],
3332                 unsigned int dpte_group_bytes[],
3333                 double HostVMInefficiencyFactor,
3334                 double HostVMMinPageSize,
3335                 unsigned int HostVMMaxNonCachedPageTableLevels)
3336 {
3337         double ExtraLatencyBytes;
3338         double ExtraLatency;
3339
3340         ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
3341                         ReorderingBytes,
3342                         TotalNumberOfActiveDPP,
3343                         PixelChunkSizeInKByte,
3344                         TotalNumberOfDCCActiveDPP,
3345                         MetaChunkSize,
3346                         GPUVMEnable,
3347                         HostVMEnable,
3348                         NumberOfActiveSurfaces,
3349                         NumberOfDPP,
3350                         dpte_group_bytes,
3351                         HostVMInefficiencyFactor,
3352                         HostVMMinPageSize,
3353                         HostVMMaxNonCachedPageTableLevels);
3354
3355         ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
3356
3357 #ifdef __DML_VBA_DEBUG__
3358         dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
3359         dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
3360         dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
3361         dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
3362         dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
3363 #endif
3364
3365         return ExtraLatency;
3366 } // CalculateExtraLatency
3367
3368 bool dml32_CalculatePrefetchSchedule(
3369                 double HostVMInefficiencyFactor,
3370                 DmlPipe *myPipe,
3371                 unsigned int DSCDelay,
3372                 double DPPCLKDelaySubtotalPlusCNVCFormater,
3373                 double DPPCLKDelaySCL,
3374                 double DPPCLKDelaySCLLBOnly,
3375                 double DPPCLKDelayCNVCCursor,
3376                 double DISPCLKDelaySubtotal,
3377                 unsigned int DPP_RECOUT_WIDTH,
3378                 enum output_format_class OutputFormat,
3379                 unsigned int MaxInterDCNTileRepeaters,
3380                 unsigned int VStartup,
3381                 unsigned int MaxVStartup,
3382                 unsigned int GPUVMPageTableLevels,
3383                 bool GPUVMEnable,
3384                 bool HostVMEnable,
3385                 unsigned int HostVMMaxNonCachedPageTableLevels,
3386                 double HostVMMinPageSize,
3387                 bool DynamicMetadataEnable,
3388                 bool DynamicMetadataVMEnabled,
3389                 int DynamicMetadataLinesBeforeActiveRequired,
3390                 unsigned int DynamicMetadataTransmittedBytes,
3391                 double UrgentLatency,
3392                 double UrgentExtraLatency,
3393                 double TCalc,
3394                 unsigned int PDEAndMetaPTEBytesFrame,
3395                 unsigned int MetaRowByte,
3396                 unsigned int PixelPTEBytesPerRow,
3397                 double PrefetchSourceLinesY,
3398                 unsigned int SwathWidthY,
3399                 unsigned int VInitPreFillY,
3400                 unsigned int MaxNumSwathY,
3401                 double PrefetchSourceLinesC,
3402                 unsigned int SwathWidthC,
3403                 unsigned int VInitPreFillC,
3404                 unsigned int MaxNumSwathC,
3405                 unsigned int swath_width_luma_ub,
3406                 unsigned int swath_width_chroma_ub,
3407                 unsigned int SwathHeightY,
3408                 unsigned int SwathHeightC,
3409                 double TWait,
3410                 /* Output */
3411                 double   *DSTXAfterScaler,
3412                 double   *DSTYAfterScaler,
3413                 double *DestinationLinesForPrefetch,
3414                 double *PrefetchBandwidth,
3415                 double *DestinationLinesToRequestVMInVBlank,
3416                 double *DestinationLinesToRequestRowInVBlank,
3417                 double *VRatioPrefetchY,
3418                 double *VRatioPrefetchC,
3419                 double *RequiredPrefetchPixDataBWLuma,
3420                 double *RequiredPrefetchPixDataBWChroma,
3421                 bool   *NotEnoughTimeForDynamicMetadata,
3422                 double *Tno_bw,
3423                 double *prefetch_vmrow_bw,
3424                 double *Tdmdl_vm,
3425                 double *Tdmdl,
3426                 double *TSetup,
3427                 unsigned int   *VUpdateOffsetPix,
3428                 double   *VUpdateWidthPix,
3429                 double   *VReadyOffsetPix)
3430 {
3431         bool MyError = false;
3432         unsigned int DPPCycles, DISPCLKCycles;
3433         double DSTTotalPixelsAfterScaler;
3434         double LineTime;
3435         double dst_y_prefetch_equ;
3436         double prefetch_bw_oto;
3437         double Tvm_oto;
3438         double Tr0_oto;
3439         double Tvm_oto_lines;
3440         double Tr0_oto_lines;
3441         double dst_y_prefetch_oto;
3442         double TimeForFetchingMetaPTE = 0;
3443         double TimeForFetchingRowInVBlank = 0;
3444         double LinesToRequestPrefetchPixelData = 0;
3445         unsigned int HostVMDynamicLevelsTrips;
3446         double  trip_to_mem;
3447         double  Tvm_trips;
3448         double  Tr0_trips;
3449         double  Tvm_trips_rounded;
3450         double  Tr0_trips_rounded;
3451         double  Lsw_oto;
3452         double  Tpre_rounded;
3453         double  prefetch_bw_equ;
3454         double  Tvm_equ;
3455         double  Tr0_equ;
3456         double  Tdmbf;
3457         double  Tdmec;
3458         double  Tdmsks;
3459         double  prefetch_sw_bytes;
3460         double  bytes_pp;
3461         double  dep_bytes;
3462         unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__;
3463         double  min_Lsw;
3464         double  Tsw_est1 = 0;
3465         double  Tsw_est3 = 0;
3466
3467         if (GPUVMEnable == true && HostVMEnable == true)
3468                 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3469         else
3470                 HostVMDynamicLevelsTrips = 0;
3471 #ifdef __DML_VBA_DEBUG__
3472         dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
3473         dml_print("DML::%s: GPUVMPageTableLevels = %d\n", __func__, GPUVMPageTableLevels);
3474         dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
3475         dml_print("DML::%s: HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
3476                         __func__, HostVMEnable, HostVMInefficiencyFactor);
3477 #endif
3478         dml32_CalculateVUpdateAndDynamicMetadataParameters(
3479                         MaxInterDCNTileRepeaters,
3480                         myPipe->Dppclk,
3481                         myPipe->Dispclk,
3482                         myPipe->DCFClkDeepSleep,
3483                         myPipe->PixelClock,
3484                         myPipe->HTotal,
3485                         myPipe->VBlank,
3486                         DynamicMetadataTransmittedBytes,
3487                         DynamicMetadataLinesBeforeActiveRequired,
3488                         myPipe->InterlaceEnable,
3489                         myPipe->ProgressiveToInterlaceUnitInOPP,
3490                         TSetup,
3491
3492                         /* output */
3493                         &Tdmbf,
3494                         &Tdmec,
3495                         &Tdmsks,
3496                         VUpdateOffsetPix,
3497                         VUpdateWidthPix,
3498                         VReadyOffsetPix);
3499
3500         LineTime = myPipe->HTotal / myPipe->PixelClock;
3501         trip_to_mem = UrgentLatency;
3502         Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
3503
3504         if (DynamicMetadataVMEnabled == true)
3505                 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
3506         else
3507                 *Tdmdl = TWait + UrgentExtraLatency;
3508
3509 #ifdef __DML_VBA_ALLOW_DELTA__
3510         if (DynamicMetadataEnable == false)
3511                 *Tdmdl = 0.0;
3512 #endif
3513
3514         if (DynamicMetadataEnable == true) {
3515                 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
3516                         *NotEnoughTimeForDynamicMetadata = true;
3517 #ifdef __DML_VBA_DEBUG__
3518                         dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
3519                         dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
3520                                         __func__, Tdmbf);
3521                         dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3522                         dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
3523                                         __func__, Tdmsks);
3524                         dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
3525                                         __func__, *Tdmdl);
3526 #endif
3527                 } else {
3528                         *NotEnoughTimeForDynamicMetadata = false;
3529                 }
3530         } else {
3531                 *NotEnoughTimeForDynamicMetadata = false;
3532         }
3533
3534         *Tdmdl_vm =  (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true &&
3535                         GPUVMEnable == true ? TWait + Tvm_trips : 0);
3536
3537         if (myPipe->ScalerEnabled)
3538                 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
3539         else
3540                 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
3541
3542         DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
3543
3544         DISPCLKCycles = DISPCLKDelaySubtotal;
3545
3546         if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
3547                 return true;
3548
3549         *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
3550                         myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
3551
3552         *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
3553                         + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
3554                         + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
3555                                         myPipe->HActive / 2 : 0)
3556                         + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
3557
3558 #ifdef __DML_VBA_DEBUG__
3559         dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
3560         dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
3561         dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
3562         dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
3563         dml_print("DML::%s: DISPCLK: %f\n", __func__,  myPipe->Dispclk);
3564         dml_print("DML::%s: DSCDelay: %d\n", __func__,  DSCDelay);
3565         dml_print("DML::%s: ODMMode: %d\n", __func__,  myPipe->ODMMode);
3566         dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
3567         dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__,  *DSTXAfterScaler);
3568 #endif
3569
3570         if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
3571                 *DSTYAfterScaler = 1;
3572         else
3573                 *DSTYAfterScaler = 0;
3574
3575         DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
3576         *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
3577         *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
3578 #ifdef __DML_VBA_DEBUG__
3579         dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__,  *DSTXAfterScaler);
3580         dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
3581 #endif
3582
3583         MyError = false;
3584
3585         Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
3586
3587         if (GPUVMEnable == true) {
3588                 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
3589                 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3590                 if (GPUVMPageTableLevels >= 3) {
3591                         *Tno_bw = UrgentExtraLatency + trip_to_mem *
3592                                         (double) ((GPUVMPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
3593                 } else if (GPUVMPageTableLevels == 1 && myPipe->DCCEnable != true) {
3594                         Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
3595                                         4.0 * LineTime; // VBA_ERROR
3596                         *Tno_bw = UrgentExtraLatency;
3597                 } else {
3598                         *Tno_bw = 0;
3599                 }
3600         } else if (myPipe->DCCEnable == true) {
3601                 Tvm_trips_rounded = LineTime / 4.0;
3602                 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3603                 *Tno_bw = 0;
3604         } else {
3605                 Tvm_trips_rounded = LineTime / 4.0;
3606                 Tr0_trips_rounded = LineTime / 2.0;
3607                 *Tno_bw = 0;
3608         }
3609         Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
3610         Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
3611
3612         if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
3613                         || myPipe->SourcePixelFormat == dm_420_12) {
3614                 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
3615         } else {
3616                 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
3617         }
3618
3619         prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
3620                         + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
3621         prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
3622                         prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
3623
3624         min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
3625         min_Lsw = dml_max(min_Lsw, 1.0);
3626         Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
3627
3628         if (GPUVMEnable == true) {
3629                 Tvm_oto = dml_max3(
3630                                 Tvm_trips,
3631                                 *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
3632                                 LineTime / 4.0);
3633         } else
3634                 Tvm_oto = LineTime / 4.0;
3635
3636         if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
3637                 Tr0_oto = dml_max4(
3638                                 Tr0_trips,
3639                                 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
3640                                 (LineTime - Tvm_oto)/2.0,
3641                                 LineTime / 4.0);
3642 #ifdef __DML_VBA_DEBUG__
3643                 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
3644                                 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
3645                 dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
3646                 dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
3647                 dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
3648 #endif
3649         } else
3650                 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
3651
3652         Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
3653         Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
3654         dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
3655
3656         dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
3657                         (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
3658
3659 #ifdef __DML_VBA_DEBUG__
3660         dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
3661         dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
3662         dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
3663         dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
3664         dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
3665         dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3666         dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3667         dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3668         dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
3669         dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3670         dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
3671         dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
3672         dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
3673         dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3674         dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
3675         dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
3676         dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3677         dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
3678         dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
3679         dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
3680         dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
3681         dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
3682         dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
3683         dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
3684         dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
3685         dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
3686         dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
3687 #endif
3688
3689         dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
3690         Tpre_rounded = dst_y_prefetch_equ * LineTime;
3691 #ifdef __DML_VBA_DEBUG__
3692         dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
3693         dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
3694         dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
3695         dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
3696                         __func__, VStartup * LineTime);
3697         dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
3698         dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
3699         dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
3700         dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3701         dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
3702         dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
3703         dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
3704                         __func__, *DSTYAfterScaler);
3705 #endif
3706         dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
3707                         MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
3708
3709         if (prefetch_sw_bytes < dep_bytes)
3710                 prefetch_sw_bytes = 2 * dep_bytes;
3711
3712         *PrefetchBandwidth = 0;
3713         *DestinationLinesToRequestVMInVBlank = 0;
3714         *DestinationLinesToRequestRowInVBlank = 0;
3715         *VRatioPrefetchY = 0;
3716         *VRatioPrefetchC = 0;
3717         *RequiredPrefetchPixDataBWLuma = 0;
3718         if (dst_y_prefetch_equ > 1) {
3719                 double PrefetchBandwidth1;
3720                 double PrefetchBandwidth2;
3721                 double PrefetchBandwidth3;
3722                 double PrefetchBandwidth4;
3723
3724                 if (Tpre_rounded - *Tno_bw > 0) {
3725                         PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3726                                         + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3727                                         + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
3728                         Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
3729                 } else
3730                         PrefetchBandwidth1 = 0;
3731
3732                 if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
3733                                 && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
3734                         PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3735                                         + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3736                                         / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
3737                 }
3738
3739                 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
3740                         PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
3741                         (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
3742                 else
3743                         PrefetchBandwidth2 = 0;
3744
3745                 if (Tpre_rounded - Tvm_trips_rounded > 0) {
3746                         PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3747                                         + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
3748                         Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
3749                 } else
3750                         PrefetchBandwidth3 = 0;
3751
3752
3753                 if (VStartup == MaxVStartup &&
3754                                 (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
3755                                 LineTime - Tvm_trips_rounded > 0) {
3756                         PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3757                                         / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
3758                 }
3759
3760                 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
3761                         PrefetchBandwidth4 = prefetch_sw_bytes /
3762                                         (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
3763                 } else {
3764                         PrefetchBandwidth4 = 0;
3765                 }
3766
3767 #ifdef __DML_VBA_DEBUG__
3768                 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
3769                 dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
3770                 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
3771                 dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
3772                 dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
3773                 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
3774                 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
3775                 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
3776                 dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
3777 #endif
3778                 {
3779                         bool Case1OK;
3780                         bool Case2OK;
3781                         bool Case3OK;
3782
3783                         if (PrefetchBandwidth1 > 0) {
3784                                 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
3785                                                 >= Tvm_trips_rounded
3786                                                 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3787                                                                 / PrefetchBandwidth1 >= Tr0_trips_rounded) {
3788                                         Case1OK = true;
3789                                 } else {
3790                                         Case1OK = false;
3791                                 }
3792                         } else {
3793                                 Case1OK = false;
3794                         }
3795
3796                         if (PrefetchBandwidth2 > 0) {
3797                                 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
3798                                                 >= Tvm_trips_rounded
3799                                                 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3800                                                 / PrefetchBandwidth2 < Tr0_trips_rounded) {
3801                                         Case2OK = true;
3802                                 } else {
3803                                         Case2OK = false;
3804                                 }
3805                         } else {
3806                                 Case2OK = false;
3807                         }
3808
3809                         if (PrefetchBandwidth3 > 0) {
3810                                 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
3811                                                 Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
3812                                                                 HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
3813                                                                 Tr0_trips_rounded) {
3814                                         Case3OK = true;
3815                                 } else {
3816                                         Case3OK = false;
3817                                 }
3818                         } else {
3819                                 Case3OK = false;
3820                         }
3821
3822                         if (Case1OK)
3823                                 prefetch_bw_equ = PrefetchBandwidth1;
3824                         else if (Case2OK)
3825                                 prefetch_bw_equ = PrefetchBandwidth2;
3826                         else if (Case3OK)
3827                                 prefetch_bw_equ = PrefetchBandwidth3;
3828                         else
3829                                 prefetch_bw_equ = PrefetchBandwidth4;
3830
3831 #ifdef __DML_VBA_DEBUG__
3832                         dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
3833                         dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
3834                         dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
3835                         dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
3836 #endif
3837
3838                         if (prefetch_bw_equ > 0) {
3839                                 if (GPUVMEnable == true) {
3840                                         Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
3841                                                         HostVMInefficiencyFactor / prefetch_bw_equ,
3842                                                         Tvm_trips, LineTime / 4);
3843                                 } else {
3844                                         Tvm_equ = LineTime / 4;
3845                                 }
3846
3847                                 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
3848                                         Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
3849                                                         HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
3850                                                         (LineTime - Tvm_equ) / 2, LineTime / 4);
3851                                 } else {
3852                                         Tr0_equ = (LineTime - Tvm_equ) / 2;
3853                                 }
3854                         } else {
3855                                 Tvm_equ = 0;
3856                                 Tr0_equ = 0;
3857 #ifdef __DML_VBA_DEBUG__
3858                                 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
3859 #endif
3860                         }
3861                 }
3862
3863                 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
3864                         *DestinationLinesForPrefetch = dst_y_prefetch_oto;
3865                         TimeForFetchingMetaPTE = Tvm_oto;
3866                         TimeForFetchingRowInVBlank = Tr0_oto;
3867                         *PrefetchBandwidth = prefetch_bw_oto;
3868                 } else {
3869                         *DestinationLinesForPrefetch = dst_y_prefetch_equ;
3870                         TimeForFetchingMetaPTE = Tvm_equ;
3871                         TimeForFetchingRowInVBlank = Tr0_equ;
3872                         *PrefetchBandwidth = prefetch_bw_equ;
3873                 }
3874
3875                 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
3876
3877                 *DestinationLinesToRequestRowInVBlank =
3878                                 dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
3879
3880                 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch -
3881                                 *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
3882
3883 #ifdef __DML_VBA_DEBUG__
3884                 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
3885                 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3886                                 __func__, *DestinationLinesToRequestVMInVBlank);
3887                 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
3888                 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3889                 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
3890                                 __func__, *DestinationLinesToRequestRowInVBlank);
3891                 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3892                 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
3893 #endif
3894
3895                 if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
3896                         *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
3897                         *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3898 #ifdef __DML_VBA_DEBUG__
3899                         dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3900                         dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
3901                         dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
3902 #endif
3903                         if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
3904                                 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
3905                                         *VRatioPrefetchY =
3906                                                         dml_max((double) PrefetchSourceLinesY /
3907                                                                         LinesToRequestPrefetchPixelData,
3908                                                                         (double) MaxNumSwathY * SwathHeightY /
3909                                                                         (LinesToRequestPrefetchPixelData -
3910                                                                         (VInitPreFillY - 3.0) / 2.0));
3911                                         *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3912                                 } else {
3913                                         MyError = true;
3914                                         *VRatioPrefetchY = 0;
3915                                 }
3916 #ifdef __DML_VBA_DEBUG__
3917                                 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3918                                 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3919                                 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
3920 #endif
3921                         }
3922
3923                         *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
3924                         *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3925
3926 #ifdef __DML_VBA_DEBUG__
3927                         dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3928                         dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
3929                         dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
3930 #endif
3931                         if ((SwathHeightC > 4)) {
3932                                 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
3933                                         *VRatioPrefetchC =
3934                                                 dml_max(*VRatioPrefetchC,
3935                                                         (double) MaxNumSwathC * SwathHeightC /
3936                                                         (LinesToRequestPrefetchPixelData -
3937                                                         (VInitPreFillC - 3.0) / 2.0));
3938                                         *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3939                                 } else {
3940                                         MyError = true;
3941                                         *VRatioPrefetchC = 0;
3942                                 }
3943 #ifdef __DML_VBA_DEBUG__
3944                                 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3945                                 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3946                                 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
3947 #endif
3948                         }
3949
3950                         *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
3951                                         / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
3952                                         / LineTime;
3953
3954 #ifdef __DML_VBA_DEBUG__
3955                         dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3956                         dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3957                         dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3958                         dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
3959                                         __func__, *RequiredPrefetchPixDataBWLuma);
3960 #endif
3961                         *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
3962                                         LinesToRequestPrefetchPixelData
3963                                         * myPipe->BytePerPixelC
3964                                         * swath_width_chroma_ub / LineTime;
3965                 } else {
3966                         MyError = true;
3967 #ifdef __DML_VBA_DEBUG__
3968                         dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
3969                                         __func__, LinesToRequestPrefetchPixelData);
3970 #endif
3971                         *VRatioPrefetchY = 0;
3972                         *VRatioPrefetchC = 0;
3973                         *RequiredPrefetchPixDataBWLuma = 0;
3974                         *RequiredPrefetchPixDataBWChroma = 0;
3975                 }
3976 #ifdef __DML_VBA_DEBUG__
3977                 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
3978                         (double)LinesToRequestPrefetchPixelData * LineTime +
3979                         2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
3980                 dml_print("DML:  Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
3981                 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
3982                         (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
3983                 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
3984                 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
3985                         TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
3986                         ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
3987                 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
3988                                 PixelPTEBytesPerRow);
3989 #endif
3990         } else {
3991                 MyError = true;
3992 #ifdef __DML_VBA_DEBUG__
3993                 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
3994                                 __func__, dst_y_prefetch_equ);
3995 #endif
3996         }
3997
3998         {
3999                 double prefetch_vm_bw;
4000                 double prefetch_row_bw;
4001
4002                 if (PDEAndMetaPTEBytesFrame == 0) {
4003                         prefetch_vm_bw = 0;
4004                 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
4005 #ifdef __DML_VBA_DEBUG__
4006                         dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
4007                         dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
4008                         dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
4009                                         __func__, *DestinationLinesToRequestVMInVBlank);
4010                         dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
4011 #endif
4012                         prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
4013                                         (*DestinationLinesToRequestVMInVBlank * LineTime);
4014 #ifdef __DML_VBA_DEBUG__
4015                         dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
4016 #endif
4017                 } else {
4018                         prefetch_vm_bw = 0;
4019                         MyError = true;
4020 #ifdef __DML_VBA_DEBUG__
4021                         dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
4022                                         __func__, *DestinationLinesToRequestVMInVBlank);
4023 #endif
4024                 }
4025
4026                 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
4027                         prefetch_row_bw = 0;
4028                 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
4029                         prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
4030                                         (*DestinationLinesToRequestRowInVBlank * LineTime);
4031
4032 #ifdef __DML_VBA_DEBUG__
4033                         dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
4034                         dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
4035                         dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
4036                                         __func__, *DestinationLinesToRequestRowInVBlank);
4037                         dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
4038 #endif
4039                 } else {
4040                         prefetch_row_bw = 0;
4041                         MyError = true;
4042 #ifdef __DML_VBA_DEBUG__
4043                         dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
4044                                         __func__, *DestinationLinesToRequestRowInVBlank);
4045 #endif
4046                 }
4047
4048                 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
4049         }
4050
4051         if (MyError) {
4052                 *PrefetchBandwidth = 0;
4053                 TimeForFetchingMetaPTE = 0;
4054                 TimeForFetchingRowInVBlank = 0;
4055                 *DestinationLinesToRequestVMInVBlank = 0;
4056                 *DestinationLinesToRequestRowInVBlank = 0;
4057                 *DestinationLinesForPrefetch = 0;
4058                 LinesToRequestPrefetchPixelData = 0;
4059                 *VRatioPrefetchY = 0;
4060                 *VRatioPrefetchC = 0;
4061                 *RequiredPrefetchPixDataBWLuma = 0;
4062                 *RequiredPrefetchPixDataBWChroma = 0;
4063         }
4064
4065         return MyError;
4066 } // CalculatePrefetchSchedule
4067
4068 void dml32_CalculateFlipSchedule(
4069                 double HostVMInefficiencyFactor,
4070                 double UrgentExtraLatency,
4071                 double UrgentLatency,
4072                 unsigned int GPUVMMaxPageTableLevels,
4073                 bool HostVMEnable,
4074                 unsigned int HostVMMaxNonCachedPageTableLevels,
4075                 bool GPUVMEnable,
4076                 double HostVMMinPageSize,
4077                 double PDEAndMetaPTEBytesPerFrame,
4078                 double MetaRowBytes,
4079                 double DPTEBytesPerRow,
4080                 double BandwidthAvailableForImmediateFlip,
4081                 unsigned int TotImmediateFlipBytes,
4082                 enum source_format_class SourcePixelFormat,
4083                 double LineTime,
4084                 double VRatio,
4085                 double VRatioChroma,
4086                 double Tno_bw,
4087                 bool DCCEnable,
4088                 unsigned int dpte_row_height,
4089                 unsigned int meta_row_height,
4090                 unsigned int dpte_row_height_chroma,
4091                 unsigned int meta_row_height_chroma,
4092                 bool    use_one_row_for_frame_flip,
4093
4094                 /* Output */
4095                 double *DestinationLinesToRequestVMInImmediateFlip,
4096                 double *DestinationLinesToRequestRowInImmediateFlip,
4097                 double *final_flip_bw,
4098                 bool *ImmediateFlipSupportedForPipe)
4099 {
4100         double min_row_time = 0.0;
4101         unsigned int HostVMDynamicLevelsTrips;
4102         double TimeForFetchingMetaPTEImmediateFlip;
4103         double TimeForFetchingRowInVBlankImmediateFlip;
4104         double ImmediateFlipBW;
4105
4106         if (GPUVMEnable == true && HostVMEnable == true)
4107                 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
4108         else
4109                 HostVMDynamicLevelsTrips = 0;
4110
4111 #ifdef __DML_VBA_DEBUG__
4112         dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
4113         dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
4114 #endif
4115
4116         if (TotImmediateFlipBytes > 0) {
4117                 if (use_one_row_for_frame_flip) {
4118                         ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
4119                                         BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4120                 } else {
4121                         ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
4122                                         BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4123                 }
4124                 if (GPUVMEnable == true) {
4125                         TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
4126                                         HostVMInefficiencyFactor / ImmediateFlipBW,
4127                                         UrgentExtraLatency + UrgentLatency *
4128                                         (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
4129                                         LineTime / 4.0);
4130                 } else {
4131                         TimeForFetchingMetaPTEImmediateFlip = 0;
4132                 }
4133                 if ((GPUVMEnable == true || DCCEnable == true)) {
4134                         TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
4135                                         (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
4136                                         UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
4137                 } else {
4138                         TimeForFetchingRowInVBlankImmediateFlip = 0;
4139                 }
4140
4141                 *DestinationLinesToRequestVMInImmediateFlip =
4142                                 dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
4143                 *DestinationLinesToRequestRowInImmediateFlip =
4144                                 dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
4145
4146                 if (GPUVMEnable == true) {
4147                         *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
4148                                         (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
4149                                         (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4150                                         (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
4151                 } else if ((GPUVMEnable == true || DCCEnable == true)) {
4152                         *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4153                                         (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
4154                 } else {
4155                         *final_flip_bw = 0;
4156                 }
4157         } else {
4158                 TimeForFetchingMetaPTEImmediateFlip = 0;
4159                 TimeForFetchingRowInVBlankImmediateFlip = 0;
4160                 *DestinationLinesToRequestVMInImmediateFlip = 0;
4161                 *DestinationLinesToRequestRowInImmediateFlip = 0;
4162                 *final_flip_bw = 0;
4163         }
4164
4165         if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
4166                 if (GPUVMEnable == true && DCCEnable != true) {
4167                         min_row_time = dml_min(dpte_row_height *
4168                                         LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
4169                 } else if (GPUVMEnable != true && DCCEnable == true) {
4170                         min_row_time = dml_min(meta_row_height *
4171                                         LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
4172                 } else {
4173                         min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
4174                                         LineTime / VRatio, dpte_row_height_chroma * LineTime /
4175                                         VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
4176                 }
4177         } else {
4178                 if (GPUVMEnable == true && DCCEnable != true) {
4179                         min_row_time = dpte_row_height * LineTime / VRatio;
4180                 } else if (GPUVMEnable != true && DCCEnable == true) {
4181                         min_row_time = meta_row_height * LineTime / VRatio;
4182                 } else {
4183                         min_row_time =
4184                                 dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
4185                 }
4186         }
4187
4188         if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
4189                         || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
4190                                         > min_row_time) {
4191                 *ImmediateFlipSupportedForPipe = false;
4192         } else {
4193                 *ImmediateFlipSupportedForPipe = true;
4194         }
4195
4196 #ifdef __DML_VBA_DEBUG__
4197         dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
4198         dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
4199         dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
4200                         __func__, *DestinationLinesToRequestVMInImmediateFlip);
4201         dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
4202                         __func__, *DestinationLinesToRequestRowInImmediateFlip);
4203         dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
4204         dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
4205                         __func__, TimeForFetchingRowInVBlankImmediateFlip);
4206         dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
4207         dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
4208 #endif
4209 } // CalculateFlipSchedule
4210
4211 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
4212                 bool USRRetrainingRequiredFinal,
4213                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
4214                 unsigned int PrefetchMode,
4215                 unsigned int NumberOfActiveSurfaces,
4216                 unsigned int MaxLineBufferLines,
4217                 unsigned int LineBufferSize,
4218                 unsigned int WritebackInterfaceBufferSize,
4219                 double DCFCLK,
4220                 double ReturnBW,
4221                 bool SynchronizeTimingsFinal,
4222                 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
4223                 bool DRRDisplay[],
4224                 unsigned int dpte_group_bytes[],
4225                 unsigned int meta_row_height[],
4226                 unsigned int meta_row_height_chroma[],
4227                 SOCParametersList mmSOCParameters,
4228                 unsigned int WritebackChunkSize,
4229                 double SOCCLK,
4230                 double DCFClkDeepSleep,
4231                 unsigned int DETBufferSizeY[],
4232                 unsigned int DETBufferSizeC[],
4233                 unsigned int SwathHeightY[],
4234                 unsigned int SwathHeightC[],
4235                 unsigned int LBBitPerPixel[],
4236                 double SwathWidthY[],
4237                 double SwathWidthC[],
4238                 double HRatio[],
4239                 double HRatioChroma[],
4240                 unsigned int VTaps[],
4241                 unsigned int VTapsChroma[],
4242                 double VRatio[],
4243                 double VRatioChroma[],
4244                 unsigned int HTotal[],
4245                 unsigned int VTotal[],
4246                 unsigned int VActive[],
4247                 double PixelClock[],
4248                 unsigned int BlendingAndTiming[],
4249                 unsigned int DPPPerSurface[],
4250                 double BytePerPixelDETY[],
4251                 double BytePerPixelDETC[],
4252                 double DSTXAfterScaler[],
4253                 double DSTYAfterScaler[],
4254                 bool WritebackEnable[],
4255                 enum source_format_class WritebackPixelFormat[],
4256                 double WritebackDestinationWidth[],
4257                 double WritebackDestinationHeight[],
4258                 double WritebackSourceHeight[],
4259                 bool UnboundedRequestEnabled,
4260                 unsigned int CompressedBufferSizeInkByte,
4261
4262                 /* Output */
4263                 Watermarks *Watermark,
4264                 enum clock_change_support *DRAMClockChangeSupport,
4265                 double MaxActiveDRAMClockChangeLatencySupported[],
4266                 unsigned int SubViewportLinesNeededInMALL[],
4267                 enum dm_fclock_change_support *FCLKChangeSupport,
4268                 double *MinActiveFCLKChangeLatencySupported,
4269                 bool *USRRetrainingSupport,
4270                 double ActiveDRAMClockChangeLatencyMargin[])
4271 {
4272         unsigned int i, j, k;
4273         unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
4274         unsigned int DRAMClockChangeSupportNumber = 0;
4275         unsigned int LastSurfaceWithoutMargin;
4276         unsigned int DRAMClockChangeMethod = 0;
4277         bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
4278         double MinActiveFCLKChangeMargin = 0.;
4279         double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
4280         double ActiveClockChangeLatencyHidingY;
4281         double ActiveClockChangeLatencyHidingC;
4282         double ActiveClockChangeLatencyHiding;
4283         double EffectiveDETBufferSizeY;
4284         double     ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
4285         double     USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
4286         double TotalPixelBW = 0.0;
4287         bool    SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
4288         double     EffectiveLBLatencyHidingY;
4289         double     EffectiveLBLatencyHidingC;
4290         double     LinesInDETY[DC__NUM_DPP__MAX];
4291         double     LinesInDETC[DC__NUM_DPP__MAX];
4292         unsigned int    LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
4293         unsigned int    LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
4294         double     FullDETBufferingTimeY;
4295         double     FullDETBufferingTimeC;
4296         double     WritebackDRAMClockChangeLatencyMargin;
4297         double     WritebackFCLKChangeLatencyMargin;
4298         double     WritebackLatencyHiding;
4299         bool    SameTimingForFCLKChange;
4300
4301         unsigned int    TotalActiveWriteback = 0;
4302         unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
4303         unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
4304
4305         Watermark->UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
4306         Watermark->USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
4307                         + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
4308         Watermark->DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + Watermark->UrgentWatermark;
4309         Watermark->FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + Watermark->UrgentWatermark;
4310         Watermark->StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
4311                         + 10 / DCFClkDeepSleep;
4312         Watermark->StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
4313                         + 10 / DCFClkDeepSleep;
4314         Watermark->Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
4315                         + 10 / DCFClkDeepSleep;
4316         Watermark->Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
4317                         + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
4318
4319 #ifdef __DML_VBA_DEBUG__
4320         dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
4321         dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
4322         dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
4323         dml_print("DML::%s: UrgentWatermark = %f\n", __func__, Watermark->UrgentWatermark);
4324         dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, Watermark->USRRetrainingWatermark);
4325         dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, Watermark->DRAMClockChangeWatermark);
4326         dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, Watermark->FCLKChangeWatermark);
4327         dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, Watermark->StutterExitWatermark);
4328         dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, Watermark->StutterEnterPlusExitWatermark);
4329         dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, Watermark->Z8StutterExitWatermark);
4330         dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
4331                         __func__, Watermark->Z8StutterEnterPlusExitWatermark);
4332 #endif
4333
4334
4335         TotalActiveWriteback = 0;
4336         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4337                 if (WritebackEnable[k] == true)
4338                         TotalActiveWriteback = TotalActiveWriteback + 1;
4339         }
4340
4341         if (TotalActiveWriteback <= 1) {
4342                 Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
4343         } else {
4344                 Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
4345                                 + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4346         }
4347         if (USRRetrainingRequiredFinal)
4348                 Watermark->WritebackUrgentWatermark = Watermark->WritebackUrgentWatermark
4349                                 + mmSOCParameters.USRRetrainingLatency;
4350
4351         if (TotalActiveWriteback <= 1) {
4352                 Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4353                                 + mmSOCParameters.WritebackLatency;
4354                 Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4355                                 + mmSOCParameters.WritebackLatency;
4356         } else {
4357                 Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4358                                 + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4359                 Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4360                                 + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024 / 32 / SOCCLK;
4361         }
4362
4363         if (USRRetrainingRequiredFinal)
4364                 Watermark->WritebackDRAMClockChangeWatermark = Watermark->WritebackDRAMClockChangeWatermark
4365                                 + mmSOCParameters.USRRetrainingLatency;
4366
4367         if (USRRetrainingRequiredFinal)
4368                 Watermark->WritebackFCLKChangeWatermark = Watermark->WritebackFCLKChangeWatermark
4369                                 + mmSOCParameters.USRRetrainingLatency;
4370
4371 #ifdef __DML_VBA_DEBUG__
4372         dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
4373                         __func__, Watermark->WritebackDRAMClockChangeWatermark);
4374         dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, Watermark->WritebackFCLKChangeWatermark);
4375         dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, Watermark->WritebackUrgentWatermark);
4376         dml_print("DML::%s: USRRetrainingRequiredFinal = %d\n", __func__, USRRetrainingRequiredFinal);
4377         dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
4378 #endif
4379
4380         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4381                 TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] +
4382                                 SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) / (HTotal[k] / PixelClock[k]);
4383         }
4384
4385         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4386
4387                 LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1);
4388                 LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1);
4389
4390
4391 #ifdef __DML_VBA_DEBUG__
4392                 dml_print("DML::%s: k=%d, MaxLineBufferLines = %d\n", __func__, k, MaxLineBufferLines);
4393                 dml_print("DML::%s: k=%d, LineBufferSize     = %d\n", __func__, k, LineBufferSize);
4394                 dml_print("DML::%s: k=%d, LBBitPerPixel      = %d\n", __func__, k, LBBitPerPixel[k]);
4395                 dml_print("DML::%s: k=%d, HRatio             = %f\n", __func__, k, HRatio[k]);
4396                 dml_print("DML::%s: k=%d, VTaps              = %d\n", __func__, k, VTaps[k]);
4397 #endif
4398
4399                 EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]);
4400                 EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
4401                 EffectiveDETBufferSizeY = DETBufferSizeY[k];
4402
4403                 if (UnboundedRequestEnabled) {
4404                         EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
4405                                         + CompressedBufferSizeInkByte * 1024
4406                                                         * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k])
4407                                                         / (HTotal[k] / PixelClock[k]) / TotalPixelBW;
4408                 }
4409
4410                 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
4411                 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
4412                 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
4413
4414                 ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
4415                                 - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k];
4416
4417                 if (NumberOfActiveSurfaces > 1) {
4418                         ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
4419                                         - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightY[k] * HTotal[k]
4420                                                         / PixelClock[k] / VRatio[k];
4421                 }
4422
4423                 if (BytePerPixelDETC[k] > 0) {
4424                         LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
4425                         LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]);
4426                         FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k])
4427                                         / VRatioChroma[k];
4428                         ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
4429                                         - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k]
4430                                                         / PixelClock[k];
4431                         if (NumberOfActiveSurfaces > 1) {
4432                                 ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
4433                                                 - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightC[k] * HTotal[k]
4434                                                                 / PixelClock[k] / VRatioChroma[k];
4435                         }
4436                         ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY,
4437                                         ActiveClockChangeLatencyHidingC);
4438                 } else {
4439                         ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
4440                 }
4441
4442                 ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
4443                                 - Watermark->DRAMClockChangeWatermark;
4444                 ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
4445                                 - Watermark->FCLKChangeWatermark;
4446                 USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark;
4447
4448                 if (WritebackEnable[k]) {
4449                         WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024
4450                                         / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k]
4451                                                         / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
4452                         if (WritebackPixelFormat[k] == dm_444_64)
4453                                 WritebackLatencyHiding = WritebackLatencyHiding / 2;
4454
4455                         WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
4456                                         - Watermark->WritebackDRAMClockChangeWatermark;
4457
4458                         WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
4459                                         - Watermark->WritebackFCLKChangeWatermark;
4460
4461                         ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
4462                                         WritebackFCLKChangeLatencyMargin);
4463                         ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k],
4464                                         WritebackDRAMClockChangeLatencyMargin);
4465                 }
4466                 MaxActiveDRAMClockChangeLatencySupported[k] =
4467                                 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
4468                                                 0 :
4469                                                 (ActiveDRAMClockChangeLatencyMargin[k]
4470                                                                 + mmSOCParameters.DRAMClockChangeLatency);
4471         }
4472
4473         for (i = 0; i < NumberOfActiveSurfaces; ++i) {
4474                 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
4475                         if (i == j ||
4476                                         (BlendingAndTiming[i] == i && BlendingAndTiming[j] == i) ||
4477                                         (BlendingAndTiming[j] == j && BlendingAndTiming[i] == j) ||
4478                                         (BlendingAndTiming[i] == BlendingAndTiming[j] && BlendingAndTiming[i] != i) ||
4479                                         (SynchronizeTimingsFinal && PixelClock[i] == PixelClock[j] &&
4480                                         HTotal[i] == HTotal[j] && VTotal[i] == VTotal[j] &&
4481                                         VActive[i] == VActive[j]) || (SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
4482                                         (DRRDisplay[i] || DRRDisplay[j]))) {
4483                                 SynchronizedSurfaces[i][j] = true;
4484                         } else {
4485                                 SynchronizedSurfaces[i][j] = false;
4486                         }
4487                 }
4488         }
4489
4490         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4491                 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4492                                 (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
4493                                 ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
4494                         FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
4495                         MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
4496                         SurfaceWithMinActiveFCLKChangeMargin = k;
4497                 }
4498         }
4499
4500         *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
4501
4502         SameTimingForFCLKChange = true;
4503         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4504                 if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
4505                         if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4506                                         (SameTimingForFCLKChange ||
4507                                         ActiveFCLKChangeLatencyMargin[k] <
4508                                         SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
4509                                 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
4510                         }
4511                         SameTimingForFCLKChange = false;
4512                 }
4513         }
4514
4515         if (MinActiveFCLKChangeMargin > 0) {
4516                 *FCLKChangeSupport = dm_fclock_change_vactive;
4517         } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
4518                         (PrefetchMode <= 1)) {
4519                 *FCLKChangeSupport = dm_fclock_change_vblank;
4520         } else {
4521                 *FCLKChangeSupport = dm_fclock_change_unsupported;
4522         }
4523
4524         *USRRetrainingSupport = true;
4525         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4526                 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4527                                 (USRRetrainingLatencyMargin[k] < 0)) {
4528                         *USRRetrainingSupport = false;
4529                 }
4530         }
4531
4532         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4533                 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
4534                                 UseMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
4535                                 UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
4536                                 ActiveDRAMClockChangeLatencyMargin[k] < 0) {
4537                         if (PrefetchMode > 0) {
4538                                 DRAMClockChangeSupportNumber = 2;
4539                         } else if (DRAMClockChangeSupportNumber == 0) {
4540                                 DRAMClockChangeSupportNumber = 1;
4541                                 LastSurfaceWithoutMargin = k;
4542                         } else if (DRAMClockChangeSupportNumber == 1 &&
4543                                         !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
4544                                 DRAMClockChangeSupportNumber = 2;
4545                         }
4546                 }
4547         }
4548
4549         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4550                 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
4551                         DRAMClockChangeMethod = 1;
4552                 else if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
4553                         DRAMClockChangeMethod = 2;
4554         }
4555
4556         if (DRAMClockChangeMethod == 0) {
4557                 if (DRAMClockChangeSupportNumber == 0)
4558                         *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
4559                 else if (DRAMClockChangeSupportNumber == 1)
4560                         *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
4561                 else
4562                         *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4563         } else if (DRAMClockChangeMethod == 1) {
4564                 if (DRAMClockChangeSupportNumber == 0)
4565                         *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
4566                 else if (DRAMClockChangeSupportNumber == 1)
4567                         *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
4568                 else
4569                         *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4570         } else {
4571                 if (DRAMClockChangeSupportNumber == 0)
4572                         *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
4573                 else if (DRAMClockChangeSupportNumber == 1)
4574                         *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
4575                 else
4576                         *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4577         }
4578
4579         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4580                 unsigned int dst_y_pstate;
4581                 unsigned int src_y_pstate_l;
4582                 unsigned int src_y_pstate_c;
4583                 unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
4584
4585                 dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (HTotal[k] / PixelClock[k]), 1);
4586                 src_y_pstate_l = dml_ceil(dst_y_pstate * VRatio[k], SwathHeightY[k]);
4587                 src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
4588                 sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + meta_row_height[k];
4589
4590 #ifdef __DML_VBA_DEBUG__
4591 dml_print("DML::%s: k=%d, DETBufferSizeY               = %d\n", __func__, k, DETBufferSizeY[k]);
4592 dml_print("DML::%s: k=%d, BytePerPixelDETY             = %f\n", __func__, k, BytePerPixelDETY[k]);
4593 dml_print("DML::%s: k=%d, SwathWidthY                  = %d\n", __func__, k, SwathWidthY[k]);
4594 dml_print("DML::%s: k=%d, SwathHeightY                 = %d\n", __func__, k, SwathHeightY[k]);
4595 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY  = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]);
4596 dml_print("DML::%s: k=%d, dst_y_pstate      = %d\n", __func__, k, dst_y_pstate);
4597 dml_print("DML::%s: k=%d, src_y_pstate_l    = %d\n", __func__, k, src_y_pstate_l);
4598 dml_print("DML::%s: k=%d, src_y_ahead_l     = %d\n", __func__, k, src_y_ahead_l);
4599 dml_print("DML::%s: k=%d, meta_row_height   = %d\n", __func__, k, meta_row_height[k]);
4600 dml_print("DML::%s: k=%d, sub_vp_lines_l    = %d\n", __func__, k, sub_vp_lines_l);
4601 #endif
4602                 SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
4603
4604                 if (BytePerPixelDETC[k] > 0) {
4605                         src_y_pstate_c = dml_ceil(dst_y_pstate * VRatioChroma[k], SwathHeightC[k]);
4606                         src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
4607                         sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + meta_row_height_chroma[k];
4608                         SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
4609
4610 #ifdef __DML_VBA_DEBUG__
4611 dml_print("DML::%s: k=%d, src_y_pstate_c            = %d\n", __func__, k, src_y_pstate_c);
4612 dml_print("DML::%s: k=%d, src_y_ahead_c             = %d\n", __func__, k, src_y_ahead_c);
4613 dml_print("DML::%s: k=%d, meta_row_height_chroma    = %d\n", __func__, k, meta_row_height_chroma[k]);
4614 dml_print("DML::%s: k=%d, sub_vp_lines_c            = %d\n", __func__, k, sub_vp_lines_c);
4615 #endif
4616                 }
4617         }
4618 #ifdef __DML_VBA_DEBUG__
4619         dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
4620         dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
4621         dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
4622                         __func__, *MinActiveFCLKChangeLatencySupported);
4623         dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
4624 #endif
4625 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
4626
4627 double dml32_CalculateWriteBackDISPCLK(
4628                 enum source_format_class WritebackPixelFormat,
4629                 double PixelClock,
4630                 double WritebackHRatio,
4631                 double WritebackVRatio,
4632                 unsigned int WritebackHTaps,
4633                 unsigned int WritebackVTaps,
4634                 unsigned int   WritebackSourceWidth,
4635                 unsigned int   WritebackDestinationWidth,
4636                 unsigned int HTotal,
4637                 unsigned int WritebackLineBufferSize,
4638                 double DISPCLKDPPCLKVCOSpeed)
4639 {
4640         double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4641
4642         DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
4643         DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
4644         DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
4645                         WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
4646         return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
4647 }
4648
4649 void dml32_CalculateMinAndMaxPrefetchMode(
4650                 enum dm_prefetch_modes   AllowForPStateChangeOrStutterInVBlankFinal,
4651                 unsigned int             *MinPrefetchMode,
4652                 unsigned int             *MaxPrefetchMode)
4653 {
4654         if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
4655                 *MinPrefetchMode = 3;
4656                 *MaxPrefetchMode = 3;
4657         } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
4658                 *MinPrefetchMode = 2;
4659                 *MaxPrefetchMode = 2;
4660         } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
4661                 *MinPrefetchMode = 1;
4662                 *MaxPrefetchMode = 1;
4663         } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
4664                 *MinPrefetchMode = 0;
4665                 *MaxPrefetchMode = 0;
4666         } else if (AllowForPStateChangeOrStutterInVBlankFinal ==
4667                         dm_prefetch_support_uclk_fclk_and_stutter_if_possible) {
4668                 *MinPrefetchMode = 0;
4669                 *MaxPrefetchMode = 3;
4670         } else {
4671                 *MinPrefetchMode = 0;
4672                 *MaxPrefetchMode = 3;
4673         }
4674 } // CalculateMinAndMaxPrefetchMode
4675
4676 void dml32_CalculatePixelDeliveryTimes(
4677                 unsigned int             NumberOfActiveSurfaces,
4678                 double              VRatio[],
4679                 double              VRatioChroma[],
4680                 double              VRatioPrefetchY[],
4681                 double              VRatioPrefetchC[],
4682                 unsigned int             swath_width_luma_ub[],
4683                 unsigned int             swath_width_chroma_ub[],
4684                 unsigned int             DPPPerSurface[],
4685                 double              HRatio[],
4686                 double              HRatioChroma[],
4687                 double              PixelClock[],
4688                 double              PSCL_THROUGHPUT[],
4689                 double              PSCL_THROUGHPUT_CHROMA[],
4690                 double              Dppclk[],
4691                 unsigned int             BytePerPixelC[],
4692                 enum dm_rotation_angle   SourceRotation[],
4693                 unsigned int             NumberOfCursors[],
4694                 unsigned int             CursorWidth[][DC__NUM_CURSOR__MAX],
4695                 unsigned int             CursorBPP[][DC__NUM_CURSOR__MAX],
4696                 unsigned int             BlockWidth256BytesY[],
4697                 unsigned int             BlockHeight256BytesY[],
4698                 unsigned int             BlockWidth256BytesC[],
4699                 unsigned int             BlockHeight256BytesC[],
4700
4701                 /* Output */
4702                 double              DisplayPipeLineDeliveryTimeLuma[],
4703                 double              DisplayPipeLineDeliveryTimeChroma[],
4704                 double              DisplayPipeLineDeliveryTimeLumaPrefetch[],
4705                 double              DisplayPipeLineDeliveryTimeChromaPrefetch[],
4706                 double              DisplayPipeRequestDeliveryTimeLuma[],
4707                 double              DisplayPipeRequestDeliveryTimeChroma[],
4708                 double              DisplayPipeRequestDeliveryTimeLumaPrefetch[],
4709                 double              DisplayPipeRequestDeliveryTimeChromaPrefetch[],
4710                 double              CursorRequestDeliveryTime[],
4711                 double              CursorRequestDeliveryTimePrefetch[])
4712 {
4713         double   req_per_swath_ub;
4714         unsigned int k;
4715
4716         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4717
4718 #ifdef __DML_VBA_DEBUG__
4719                 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
4720                 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
4721                 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
4722                 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
4723                 dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
4724                 dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
4725                 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
4726                 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
4727                 dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
4728                 dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
4729                 dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
4730 #endif
4731
4732                 if (VRatio[k] <= 1) {
4733                         DisplayPipeLineDeliveryTimeLuma[k] =
4734                                         swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4735                 } else {
4736                         DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4737                 }
4738
4739                 if (BytePerPixelC[k] == 0) {
4740                         DisplayPipeLineDeliveryTimeChroma[k] = 0;
4741                 } else {
4742                         if (VRatioChroma[k] <= 1) {
4743                                 DisplayPipeLineDeliveryTimeChroma[k] =
4744                                         swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4745                         } else {
4746                                 DisplayPipeLineDeliveryTimeChroma[k] =
4747                                         swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4748                         }
4749                 }
4750
4751                 if (VRatioPrefetchY[k] <= 1) {
4752                         DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4753                                         swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4754                 } else {
4755                         DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4756                                         swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4757                 }
4758
4759                 if (BytePerPixelC[k] == 0) {
4760                         DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
4761                 } else {
4762                         if (VRatioPrefetchC[k] <= 1) {
4763                                 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
4764                                                 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4765                         } else {
4766                                 DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
4767                                                 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4768                         }
4769                 }
4770 #ifdef __DML_VBA_DEBUG__
4771                 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
4772                                 __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
4773                 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
4774                                 __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
4775                 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
4776                                 __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
4777                 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
4778                                 __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
4779 #endif
4780         }
4781
4782         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4783                 if (!IsVertical(SourceRotation[k]))
4784                         req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
4785                 else
4786                         req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
4787 #ifdef __DML_VBA_DEBUG__
4788                 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
4789 #endif
4790
4791                 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
4792                 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
4793                                 DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
4794                 if (BytePerPixelC[k] == 0) {
4795                         DisplayPipeRequestDeliveryTimeChroma[k] = 0;
4796                         DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
4797                 } else {
4798                         if (!IsVertical(SourceRotation[k]))
4799                                 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
4800                         else
4801                                 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
4802 #ifdef __DML_VBA_DEBUG__
4803                         dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
4804 #endif
4805                         DisplayPipeRequestDeliveryTimeChroma[k] =
4806                                         DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
4807                         DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
4808                                         DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
4809                 }
4810 #ifdef __DML_VBA_DEBUG__
4811                 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
4812                                 __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
4813                 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
4814                                 __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
4815                 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
4816                                 __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
4817                 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
4818                                 __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
4819 #endif
4820         }
4821
4822         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4823                 unsigned int cursor_req_per_width;
4824
4825                 cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
4826                                 256.0 / 8.0, 1.0);
4827                 if (NumberOfCursors[k] > 0) {
4828                         if (VRatio[k] <= 1) {
4829                                 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4830                                                 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4831                         } else {
4832                                 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4833                                                 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4834                         }
4835                         if (VRatioPrefetchY[k] <= 1) {
4836                                 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4837                                                 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4838                         } else {
4839                                 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4840                                                 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4841                         }
4842                 } else {
4843                         CursorRequestDeliveryTime[k] = 0;
4844                         CursorRequestDeliveryTimePrefetch[k] = 0;
4845                 }
4846 #ifdef __DML_VBA_DEBUG__
4847                 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
4848                                 __func__, k, NumberOfCursors[k]);
4849                 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
4850                                 __func__, k, CursorRequestDeliveryTime[k]);
4851                 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
4852                                 __func__, k, CursorRequestDeliveryTimePrefetch[k]);
4853 #endif
4854         }
4855 } // CalculatePixelDeliveryTimes
4856
4857 void dml32_CalculateMetaAndPTETimes(
4858                 bool use_one_row_for_frame[],
4859                 unsigned int NumberOfActiveSurfaces,
4860                 bool GPUVMEnable,
4861                 unsigned int MetaChunkSize,
4862                 unsigned int MinMetaChunkSizeBytes,
4863                 unsigned int    HTotal[],
4864                 double  VRatio[],
4865                 double  VRatioChroma[],
4866                 double  DestinationLinesToRequestRowInVBlank[],
4867                 double  DestinationLinesToRequestRowInImmediateFlip[],
4868                 bool DCCEnable[],
4869                 double  PixelClock[],
4870                 unsigned int BytePerPixelY[],
4871                 unsigned int BytePerPixelC[],
4872                 enum dm_rotation_angle SourceRotation[],
4873                 unsigned int dpte_row_height[],
4874                 unsigned int dpte_row_height_chroma[],
4875                 unsigned int meta_row_width[],
4876                 unsigned int meta_row_width_chroma[],
4877                 unsigned int meta_row_height[],
4878                 unsigned int meta_row_height_chroma[],
4879                 unsigned int meta_req_width[],
4880                 unsigned int meta_req_width_chroma[],
4881                 unsigned int meta_req_height[],
4882                 unsigned int meta_req_height_chroma[],
4883                 unsigned int dpte_group_bytes[],
4884                 unsigned int    PTERequestSizeY[],
4885                 unsigned int    PTERequestSizeC[],
4886                 unsigned int    PixelPTEReqWidthY[],
4887                 unsigned int    PixelPTEReqHeightY[],
4888                 unsigned int    PixelPTEReqWidthC[],
4889                 unsigned int    PixelPTEReqHeightC[],
4890                 unsigned int    dpte_row_width_luma_ub[],
4891                 unsigned int    dpte_row_width_chroma_ub[],
4892
4893                 /* Output */
4894                 double DST_Y_PER_PTE_ROW_NOM_L[],
4895                 double DST_Y_PER_PTE_ROW_NOM_C[],
4896                 double DST_Y_PER_META_ROW_NOM_L[],
4897                 double DST_Y_PER_META_ROW_NOM_C[],
4898                 double TimePerMetaChunkNominal[],
4899                 double TimePerChromaMetaChunkNominal[],
4900                 double TimePerMetaChunkVBlank[],
4901                 double TimePerChromaMetaChunkVBlank[],
4902                 double TimePerMetaChunkFlip[],
4903                 double TimePerChromaMetaChunkFlip[],
4904                 double time_per_pte_group_nom_luma[],
4905                 double time_per_pte_group_vblank_luma[],
4906                 double time_per_pte_group_flip_luma[],
4907                 double time_per_pte_group_nom_chroma[],
4908                 double time_per_pte_group_vblank_chroma[],
4909                 double time_per_pte_group_flip_chroma[])
4910 {
4911         unsigned int   meta_chunk_width;
4912         unsigned int   min_meta_chunk_width;
4913         unsigned int   meta_chunk_per_row_int;
4914         unsigned int   meta_row_remainder;
4915         unsigned int   meta_chunk_threshold;
4916         unsigned int   meta_chunks_per_row_ub;
4917         unsigned int   meta_chunk_width_chroma;
4918         unsigned int   min_meta_chunk_width_chroma;
4919         unsigned int   meta_chunk_per_row_int_chroma;
4920         unsigned int   meta_row_remainder_chroma;
4921         unsigned int   meta_chunk_threshold_chroma;
4922         unsigned int   meta_chunks_per_row_ub_chroma;
4923         unsigned int   dpte_group_width_luma;
4924         unsigned int   dpte_groups_per_row_luma_ub;
4925         unsigned int   dpte_group_width_chroma;
4926         unsigned int   dpte_groups_per_row_chroma_ub;
4927         unsigned int k;
4928
4929         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4930                 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
4931                 if (BytePerPixelC[k] == 0)
4932                         DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
4933                 else
4934                         DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
4935                 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
4936                 if (BytePerPixelC[k] == 0)
4937                         DST_Y_PER_META_ROW_NOM_C[k] = 0;
4938                 else
4939                         DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
4940         }
4941
4942         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4943                 if (DCCEnable[k] == true) {
4944                         meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
4945                         min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
4946                         meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
4947                         meta_row_remainder = meta_row_width[k] % meta_chunk_width;
4948                         if (!IsVertical(SourceRotation[k]))
4949                                 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
4950                         else
4951                                 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
4952
4953                         if (meta_row_remainder <= meta_chunk_threshold)
4954                                 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
4955                         else
4956                                 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
4957
4958                         TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
4959                                         HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4960                         TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4961                                         HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4962                         TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4963                                         HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4964                         if (BytePerPixelC[k] == 0) {
4965                                 TimePerChromaMetaChunkNominal[k] = 0;
4966                                 TimePerChromaMetaChunkVBlank[k] = 0;
4967                                 TimePerChromaMetaChunkFlip[k] = 0;
4968                         } else {
4969                                 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
4970                                                 meta_row_height_chroma[k];
4971                                 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
4972                                                 meta_row_height_chroma[k];
4973                                 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
4974                                                 meta_chunk_width_chroma;
4975                                 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
4976                                 if (!IsVertical(SourceRotation[k])) {
4977                                         meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4978                                                         meta_req_width_chroma[k];
4979                                 } else {
4980                                         meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4981                                                         meta_req_height_chroma[k];
4982                                 }
4983                                 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
4984                                         meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
4985                                 else
4986                                         meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
4987
4988                                 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
4989                                                 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4990                                 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4991                                                 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4992                                 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4993                                                 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4994                         }
4995                 } else {
4996                         TimePerMetaChunkNominal[k] = 0;
4997                         TimePerMetaChunkVBlank[k] = 0;
4998                         TimePerMetaChunkFlip[k] = 0;
4999                         TimePerChromaMetaChunkNominal[k] = 0;
5000                         TimePerChromaMetaChunkVBlank[k] = 0;
5001                         TimePerChromaMetaChunkFlip[k] = 0;
5002                 }
5003         }
5004
5005         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5006                 if (GPUVMEnable == true) {
5007                         if (!IsVertical(SourceRotation[k])) {
5008                                 dpte_group_width_luma = (double) dpte_group_bytes[k] /
5009                                                 (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
5010                         } else {
5011                                 dpte_group_width_luma = (double) dpte_group_bytes[k] /
5012                                                 (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
5013                         }
5014
5015                         if (use_one_row_for_frame[k]) {
5016                                 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5017                                                 (double) dpte_group_width_luma / 2.0, 1.0);
5018                         } else {
5019                                 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5020                                                 (double) dpte_group_width_luma, 1.0);
5021                         }
5022 #ifdef __DML_VBA_DEBUG__
5023                         dml_print("DML::%s: k=%0d, use_one_row_for_frame        = %d\n",
5024                                         __func__, k, use_one_row_for_frame[k]);
5025                         dml_print("DML::%s: k=%0d, dpte_group_bytes             = %d\n",
5026                                         __func__, k, dpte_group_bytes[k]);
5027                         dml_print("DML::%s: k=%0d, PTERequestSizeY              = %d\n",
5028                                         __func__, k, PTERequestSizeY[k]);
5029                         dml_print("DML::%s: k=%0d, PixelPTEReqWidthY            = %d\n",
5030                                         __func__, k, PixelPTEReqWidthY[k]);
5031                         dml_print("DML::%s: k=%0d, PixelPTEReqHeightY           = %d\n",
5032                                         __func__, k, PixelPTEReqHeightY[k]);
5033                         dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub       = %d\n",
5034                                         __func__, k, dpte_row_width_luma_ub[k]);
5035                         dml_print("DML::%s: k=%0d, dpte_group_width_luma        = %d\n",
5036                                         __func__, k, dpte_group_width_luma);
5037                         dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub  = %d\n",
5038                                         __func__, k, dpte_groups_per_row_luma_ub);
5039 #endif
5040
5041                         time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
5042                                         HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5043                         time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
5044                                         HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5045                         time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5046                                         HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5047                         if (BytePerPixelC[k] == 0) {
5048                                 time_per_pte_group_nom_chroma[k] = 0;
5049                                 time_per_pte_group_vblank_chroma[k] = 0;
5050                                 time_per_pte_group_flip_chroma[k] = 0;
5051                         } else {
5052                                 if (!IsVertical(SourceRotation[k])) {
5053                                         dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5054                                                         (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5055                                 } else {
5056                                         dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5057                                                         (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5058                                 }
5059
5060                                 if (use_one_row_for_frame[k]) {
5061                                         dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5062                                                         (double) dpte_group_width_chroma / 2.0, 1.0);
5063                                 } else {
5064                                         dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5065                                                         (double) dpte_group_width_chroma, 1.0);
5066                                 }
5067 #ifdef __DML_VBA_DEBUG__
5068                                 dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub        = %d\n",
5069                                                 __func__, k, dpte_row_width_chroma_ub[k]);
5070                                 dml_print("DML::%s: k=%0d, dpte_group_width_chroma        = %d\n",
5071                                                 __func__, k, dpte_group_width_chroma);
5072                                 dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub  = %d\n",
5073                                                 __func__, k, dpte_groups_per_row_chroma_ub);
5074 #endif
5075                                 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
5076                                                 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5077                                 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
5078                                                 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5079                                 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5080                                                 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5081                         }
5082                 } else {
5083                         time_per_pte_group_nom_luma[k] = 0;
5084                         time_per_pte_group_vblank_luma[k] = 0;
5085                         time_per_pte_group_flip_luma[k] = 0;
5086                         time_per_pte_group_nom_chroma[k] = 0;
5087                         time_per_pte_group_vblank_chroma[k] = 0;
5088                         time_per_pte_group_flip_chroma[k] = 0;
5089                 }
5090 #ifdef __DML_VBA_DEBUG__
5091                 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank         = %f\n",
5092                                 __func__, k, DestinationLinesToRequestRowInVBlank[k]);
5093                 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip  = %f\n",
5094                                 __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
5095                 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L                      = %f\n",
5096                                 __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
5097                 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C                      = %f\n",
5098                                 __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
5099                 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L                     = %f\n",
5100                                 __func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
5101                 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C                     = %f\n",
5102                                 __func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
5103                 dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal          = %f\n",
5104                                 __func__, k, TimePerMetaChunkNominal[k]);
5105                 dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank           = %f\n",
5106                                 __func__, k, TimePerMetaChunkVBlank[k]);
5107                 dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip             = %f\n",
5108                                 __func__, k, TimePerMetaChunkFlip[k]);
5109                 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal    = %f\n",
5110                                 __func__, k, TimePerChromaMetaChunkNominal[k]);
5111                 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank     = %f\n",
5112                                 __func__, k, TimePerChromaMetaChunkVBlank[k]);
5113                 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip       = %f\n",
5114                                 __func__, k, TimePerChromaMetaChunkFlip[k]);
5115                 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma      = %f\n",
5116                                 __func__, k, time_per_pte_group_nom_luma[k]);
5117                 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma   = %f\n",
5118                                 __func__, k, time_per_pte_group_vblank_luma[k]);
5119                 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma     = %f\n",
5120                                 __func__, k, time_per_pte_group_flip_luma[k]);
5121                 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma    = %f\n",
5122                                 __func__, k, time_per_pte_group_nom_chroma[k]);
5123                 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
5124                                 __func__, k, time_per_pte_group_vblank_chroma[k]);
5125                 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma   = %f\n",
5126                                 __func__, k, time_per_pte_group_flip_chroma[k]);
5127 #endif
5128         }
5129 } // CalculateMetaAndPTETimes
5130
5131 void dml32_CalculateVMGroupAndRequestTimes(
5132                 unsigned int     NumberOfActiveSurfaces,
5133                 bool     GPUVMEnable,
5134                 unsigned int     GPUVMMaxPageTableLevels,
5135                 unsigned int     HTotal[],
5136                 unsigned int     BytePerPixelC[],
5137                 double      DestinationLinesToRequestVMInVBlank[],
5138                 double      DestinationLinesToRequestVMInImmediateFlip[],
5139                 bool     DCCEnable[],
5140                 double      PixelClock[],
5141                 unsigned int        dpte_row_width_luma_ub[],
5142                 unsigned int        dpte_row_width_chroma_ub[],
5143                 unsigned int     vm_group_bytes[],
5144                 unsigned int     dpde0_bytes_per_frame_ub_l[],
5145                 unsigned int     dpde0_bytes_per_frame_ub_c[],
5146                 unsigned int        meta_pte_bytes_per_frame_ub_l[],
5147                 unsigned int        meta_pte_bytes_per_frame_ub_c[],
5148
5149                 /* Output */
5150                 double      TimePerVMGroupVBlank[],
5151                 double      TimePerVMGroupFlip[],
5152                 double      TimePerVMRequestVBlank[],
5153                 double      TimePerVMRequestFlip[])
5154 {
5155         unsigned int k;
5156         unsigned int   num_group_per_lower_vm_stage;
5157         unsigned int   num_req_per_lower_vm_stage;
5158
5159 #ifdef __DML_VBA_DEBUG__
5160         dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
5161         dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
5162 #endif
5163         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5164
5165 #ifdef __DML_VBA_DEBUG__
5166                 dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
5167                 dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
5168                 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
5169                                 __func__, k, dpde0_bytes_per_frame_ub_l[k]);
5170                 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
5171                                 __func__, k, dpde0_bytes_per_frame_ub_c[k]);
5172                 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
5173                                 __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
5174                 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
5175                                 __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
5176 #endif
5177
5178                 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5179                         if (DCCEnable[k] == false) {
5180                                 if (BytePerPixelC[k] > 0) {
5181                                         num_group_per_lower_vm_stage = dml_ceil(
5182                                                         (double) (dpde0_bytes_per_frame_ub_l[k]) /
5183                                                         (double) (vm_group_bytes[k]), 1.0) +
5184                                                         dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5185                                                         (double) (vm_group_bytes[k]), 1.0);
5186                                 } else {
5187                                         num_group_per_lower_vm_stage = dml_ceil(
5188                                                         (double) (dpde0_bytes_per_frame_ub_l[k]) /
5189                                                         (double) (vm_group_bytes[k]), 1.0);
5190                                 }
5191                         } else {
5192                                 if (GPUVMMaxPageTableLevels == 1) {
5193                                         if (BytePerPixelC[k] > 0) {
5194                                                 num_group_per_lower_vm_stage = dml_ceil(
5195                                                         (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5196                                                         (double) (vm_group_bytes[k]), 1.0) +
5197                                                         dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5198                                                         (double) (vm_group_bytes[k]), 1.0);
5199                                         } else {
5200                                                 num_group_per_lower_vm_stage = dml_ceil(
5201                                                                 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5202                                                                 (double) (vm_group_bytes[k]), 1.0);
5203                                         }
5204                                 } else {
5205                                         if (BytePerPixelC[k] > 0) {
5206                                                 num_group_per_lower_vm_stage = 2 + dml_ceil(
5207                                                         (double) (dpde0_bytes_per_frame_ub_l[k]) /
5208                                                         (double) (vm_group_bytes[k]), 1) +
5209                                                         dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5210                                                         (double) (vm_group_bytes[k]), 1) +
5211                                                         dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
5212                                                         (double) (vm_group_bytes[k]), 1) +
5213                                                         dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5214                                                         (double) (vm_group_bytes[k]), 1);
5215                                         } else {
5216                                                 num_group_per_lower_vm_stage = 1 + dml_ceil(
5217                                                         (double) (dpde0_bytes_per_frame_ub_l[k]) /
5218                                                         (double) (vm_group_bytes[k]), 1) + dml_ceil(
5219                                                         (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5220                                                         (double) (vm_group_bytes[k]), 1);
5221                                         }
5222                                 }
5223                         }
5224
5225                         if (DCCEnable[k] == false) {
5226                                 if (BytePerPixelC[k] > 0) {
5227                                         num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
5228                                                         dpde0_bytes_per_frame_ub_c[k] / 64;
5229                                 } else {
5230                                         num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5231                                 }
5232                         } else {
5233                                 if (GPUVMMaxPageTableLevels == 1) {
5234                                         if (BytePerPixelC[k] > 0) {
5235                                                 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
5236                                                                 meta_pte_bytes_per_frame_ub_c[k] / 64;
5237                                         } else {
5238                                                 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5239                                         }
5240                                 } else {
5241                                         if (BytePerPixelC[k] > 0) {
5242                                                 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5243                                                                 64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
5244                                                                 meta_pte_bytes_per_frame_ub_l[k] / 64 +
5245                                                                 meta_pte_bytes_per_frame_ub_c[k] / 64;
5246                                         } else {
5247                                                 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5248                                                                 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5249                                         }
5250                                 }
5251                         }
5252
5253                         TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5254                                         HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5255                         TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5256                                         HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5257                         TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5258                                         HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5259                         TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5260                                         HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5261
5262                         if (GPUVMMaxPageTableLevels > 2) {
5263                                 TimePerVMGroupVBlank[k]    = TimePerVMGroupVBlank[k] / 2;
5264                                 TimePerVMGroupFlip[k]      = TimePerVMGroupFlip[k] / 2;
5265                                 TimePerVMRequestVBlank[k]  = TimePerVMRequestVBlank[k] / 2;
5266                                 TimePerVMRequestFlip[k]    = TimePerVMRequestFlip[k] / 2;
5267                         }
5268
5269                 } else {
5270                         TimePerVMGroupVBlank[k] = 0;
5271                         TimePerVMGroupFlip[k] = 0;
5272                         TimePerVMRequestVBlank[k] = 0;
5273                         TimePerVMRequestFlip[k] = 0;
5274                 }
5275
5276 #ifdef __DML_VBA_DEBUG__
5277                 dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
5278                 dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
5279                 dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
5280                 dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
5281 #endif
5282         }
5283 } // CalculateVMGroupAndRequestTimes
5284
5285 void dml32_CalculateDCCConfiguration(
5286                 bool             DCCEnabled,
5287                 bool             DCCProgrammingAssumesScanDirectionUnknown,
5288                 enum source_format_class SourcePixelFormat,
5289                 unsigned int             SurfaceWidthLuma,
5290                 unsigned int             SurfaceWidthChroma,
5291                 unsigned int             SurfaceHeightLuma,
5292                 unsigned int             SurfaceHeightChroma,
5293                 unsigned int                nomDETInKByte,
5294                 unsigned int             RequestHeight256ByteLuma,
5295                 unsigned int             RequestHeight256ByteChroma,
5296                 enum dm_swizzle_mode     TilingFormat,
5297                 unsigned int             BytePerPixelY,
5298                 unsigned int             BytePerPixelC,
5299                 double              BytePerPixelDETY,
5300                 double              BytePerPixelDETC,
5301                 enum dm_rotation_angle   SourceRotation,
5302                 /* Output */
5303                 unsigned int        *MaxUncompressedBlockLuma,
5304                 unsigned int        *MaxUncompressedBlockChroma,
5305                 unsigned int        *MaxCompressedBlockLuma,
5306                 unsigned int        *MaxCompressedBlockChroma,
5307                 unsigned int        *IndependentBlockLuma,
5308                 unsigned int        *IndependentBlockChroma)
5309 {
5310         typedef enum {
5311                 REQ_256Bytes,
5312                 REQ_128BytesNonContiguous,
5313                 REQ_128BytesContiguous,
5314                 REQ_NA
5315         } RequestType;
5316
5317         RequestType   RequestLuma;
5318         RequestType   RequestChroma;
5319
5320         unsigned int   segment_order_horz_contiguous_luma;
5321         unsigned int   segment_order_horz_contiguous_chroma;
5322         unsigned int   segment_order_vert_contiguous_luma;
5323         unsigned int   segment_order_vert_contiguous_chroma;
5324         unsigned int req128_horz_wc_l;
5325         unsigned int req128_horz_wc_c;
5326         unsigned int req128_vert_wc_l;
5327         unsigned int req128_vert_wc_c;
5328         unsigned int MAS_vp_horz_limit;
5329         unsigned int MAS_vp_vert_limit;
5330         unsigned int max_vp_horz_width;
5331         unsigned int max_vp_vert_height;
5332         unsigned int eff_surf_width_l;
5333         unsigned int eff_surf_width_c;
5334         unsigned int eff_surf_height_l;
5335         unsigned int eff_surf_height_c;
5336         unsigned int full_swath_bytes_horz_wc_l;
5337         unsigned int full_swath_bytes_horz_wc_c;
5338         unsigned int full_swath_bytes_vert_wc_l;
5339         unsigned int full_swath_bytes_vert_wc_c;
5340         unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
5341
5342         unsigned int   yuv420;
5343         unsigned int   horz_div_l;
5344         unsigned int   horz_div_c;
5345         unsigned int   vert_div_l;
5346         unsigned int   vert_div_c;
5347
5348         unsigned int     swath_buf_size;
5349         double   detile_buf_vp_horz_limit;
5350         double   detile_buf_vp_vert_limit;
5351
5352         yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
5353                         SourcePixelFormat == dm_420_12) ? 1 : 0);
5354         horz_div_l = 1;
5355         horz_div_c = 1;
5356         vert_div_l = 1;
5357         vert_div_c = 1;
5358
5359         if (BytePerPixelY == 1)
5360                 vert_div_l = 0;
5361         if (BytePerPixelC == 1)
5362                 vert_div_c = 0;
5363
5364         if (BytePerPixelC == 0) {
5365                 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
5366                 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5367                                 BytePerPixelY / (1 + horz_div_l));
5368                 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5369                                 (1 + vert_div_l));
5370         } else {
5371                 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
5372                 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5373                                 BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
5374                                 BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
5375                 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5376                                 (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
5377                                 (1 + vert_div_c) / (1 + yuv420));
5378         }
5379
5380         if (SourcePixelFormat == dm_420_10) {
5381                 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
5382                 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
5383         }
5384
5385         detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
5386         detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
5387
5388         MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
5389         MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
5390         max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
5391         max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
5392         eff_surf_width_l =  (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
5393         eff_surf_width_c =  eff_surf_width_l / (1 + yuv420);
5394         eff_surf_height_l =  (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
5395         eff_surf_height_c =  eff_surf_height_l / (1 + yuv420);
5396
5397         full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
5398         full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
5399         if (BytePerPixelC > 0) {
5400                 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
5401                 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
5402         } else {
5403                 full_swath_bytes_horz_wc_c = 0;
5404                 full_swath_bytes_vert_wc_c = 0;
5405         }
5406
5407         if (SourcePixelFormat == dm_420_10) {
5408                 full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
5409                 full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
5410                 full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
5411                 full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
5412         }
5413
5414         if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5415                 req128_horz_wc_l = 0;
5416                 req128_horz_wc_c = 0;
5417         } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
5418                         full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5419                 req128_horz_wc_l = 0;
5420                 req128_horz_wc_c = 1;
5421         } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
5422                         full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5423                 req128_horz_wc_l = 1;
5424                 req128_horz_wc_c = 0;
5425         } else {
5426                 req128_horz_wc_l = 1;
5427                 req128_horz_wc_c = 1;
5428         }
5429
5430         if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5431                 req128_vert_wc_l = 0;
5432                 req128_vert_wc_c = 0;
5433         } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
5434                         full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5435                 req128_vert_wc_l = 0;
5436                 req128_vert_wc_c = 1;
5437         } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
5438                         full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5439                 req128_vert_wc_l = 1;
5440                 req128_vert_wc_c = 0;
5441         } else {
5442                 req128_vert_wc_l = 1;
5443                 req128_vert_wc_c = 1;
5444         }
5445
5446         if (BytePerPixelY == 2) {
5447                 segment_order_horz_contiguous_luma = 0;
5448                 segment_order_vert_contiguous_luma = 1;
5449         } else {
5450                 segment_order_horz_contiguous_luma = 1;
5451                 segment_order_vert_contiguous_luma = 0;
5452         }
5453
5454         if (BytePerPixelC == 2) {
5455                 segment_order_horz_contiguous_chroma = 0;
5456                 segment_order_vert_contiguous_chroma = 1;
5457         } else {
5458                 segment_order_horz_contiguous_chroma = 1;
5459                 segment_order_vert_contiguous_chroma = 0;
5460         }
5461 #ifdef __DML_VBA_DEBUG__
5462         dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
5463         dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
5464         dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
5465         dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
5466         dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
5467         dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
5468         dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
5469         dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
5470         dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
5471                         __func__, segment_order_horz_contiguous_chroma);
5472 #endif
5473
5474         if (DCCProgrammingAssumesScanDirectionUnknown == true) {
5475                 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
5476                         RequestLuma = REQ_256Bytes;
5477                 else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
5478                                 (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
5479                         RequestLuma = REQ_128BytesNonContiguous;
5480                 else
5481                         RequestLuma = REQ_128BytesContiguous;
5482
5483                 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
5484                         RequestChroma = REQ_256Bytes;
5485                 else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
5486                                 (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
5487                         RequestChroma = REQ_128BytesNonContiguous;
5488                 else
5489                         RequestChroma = REQ_128BytesContiguous;
5490
5491         } else if (!IsVertical(SourceRotation)) {
5492                 if (req128_horz_wc_l == 0)
5493                         RequestLuma = REQ_256Bytes;
5494                 else if (segment_order_horz_contiguous_luma == 0)
5495                         RequestLuma = REQ_128BytesNonContiguous;
5496                 else
5497                         RequestLuma = REQ_128BytesContiguous;
5498
5499                 if (req128_horz_wc_c == 0)
5500                         RequestChroma = REQ_256Bytes;
5501                 else if (segment_order_horz_contiguous_chroma == 0)
5502                         RequestChroma = REQ_128BytesNonContiguous;
5503                 else
5504                         RequestChroma = REQ_128BytesContiguous;
5505
5506         } else {
5507                 if (req128_vert_wc_l == 0)
5508                         RequestLuma = REQ_256Bytes;
5509                 else if (segment_order_vert_contiguous_luma == 0)
5510                         RequestLuma = REQ_128BytesNonContiguous;
5511                 else
5512                         RequestLuma = REQ_128BytesContiguous;
5513
5514                 if (req128_vert_wc_c == 0)
5515                         RequestChroma = REQ_256Bytes;
5516                 else if (segment_order_vert_contiguous_chroma == 0)
5517                         RequestChroma = REQ_128BytesNonContiguous;
5518                 else
5519                         RequestChroma = REQ_128BytesContiguous;
5520         }
5521
5522         if (RequestLuma == REQ_256Bytes) {
5523                 *MaxUncompressedBlockLuma = 256;
5524                 *MaxCompressedBlockLuma = 256;
5525                 *IndependentBlockLuma = 0;
5526         } else if (RequestLuma == REQ_128BytesContiguous) {
5527                 *MaxUncompressedBlockLuma = 256;
5528                 *MaxCompressedBlockLuma = 128;
5529                 *IndependentBlockLuma = 128;
5530         } else {
5531                 *MaxUncompressedBlockLuma = 256;
5532                 *MaxCompressedBlockLuma = 64;
5533                 *IndependentBlockLuma = 64;
5534         }
5535
5536         if (RequestChroma == REQ_256Bytes) {
5537                 *MaxUncompressedBlockChroma = 256;
5538                 *MaxCompressedBlockChroma = 256;
5539                 *IndependentBlockChroma = 0;
5540         } else if (RequestChroma == REQ_128BytesContiguous) {
5541                 *MaxUncompressedBlockChroma = 256;
5542                 *MaxCompressedBlockChroma = 128;
5543                 *IndependentBlockChroma = 128;
5544         } else {
5545                 *MaxUncompressedBlockChroma = 256;
5546                 *MaxCompressedBlockChroma = 64;
5547                 *IndependentBlockChroma = 64;
5548         }
5549
5550         if (DCCEnabled != true || BytePerPixelC == 0) {
5551                 *MaxUncompressedBlockChroma = 0;
5552                 *MaxCompressedBlockChroma = 0;
5553                 *IndependentBlockChroma = 0;
5554         }
5555
5556         if (DCCEnabled != true) {
5557                 *MaxUncompressedBlockLuma = 0;
5558                 *MaxCompressedBlockLuma = 0;
5559                 *IndependentBlockLuma = 0;
5560         }
5561
5562 #ifdef __DML_VBA_DEBUG__
5563         dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
5564         dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
5565         dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
5566         dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
5567         dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
5568         dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
5569 #endif
5570
5571 } // CalculateDCCConfiguration
5572
5573 void dml32_CalculateStutterEfficiency(
5574                 unsigned int      CompressedBufferSizeInkByte,
5575                 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
5576                 bool   UnboundedRequestEnabled,
5577                 unsigned int      MetaFIFOSizeInKEntries,
5578                 unsigned int      ZeroSizeBufferEntries,
5579                 unsigned int      PixelChunkSizeInKByte,
5580                 unsigned int   NumberOfActiveSurfaces,
5581                 unsigned int      ROBBufferSizeInKByte,
5582                 double    TotalDataReadBandwidth,
5583                 double    DCFCLK,
5584                 double    ReturnBW,
5585                 unsigned int      CompbufReservedSpace64B,
5586                 unsigned int      CompbufReservedSpaceZs,
5587                 double    SRExitTime,
5588                 double    SRExitZ8Time,
5589                 bool   SynchronizeTimingsFinal,
5590                 unsigned int   BlendingAndTiming[],
5591                 double    StutterEnterPlusExitWatermark,
5592                 double    Z8StutterEnterPlusExitWatermark,
5593                 bool   ProgressiveToInterlaceUnitInOPP,
5594                 bool   Interlace[],
5595                 double    MinTTUVBlank[],
5596                 unsigned int   DPPPerSurface[],
5597                 unsigned int      DETBufferSizeY[],
5598                 unsigned int   BytePerPixelY[],
5599                 double    BytePerPixelDETY[],
5600                 double      SwathWidthY[],
5601                 unsigned int   SwathHeightY[],
5602                 unsigned int   SwathHeightC[],
5603                 double    NetDCCRateLuma[],
5604                 double    NetDCCRateChroma[],
5605                 double    DCCFractionOfZeroSizeRequestsLuma[],
5606                 double    DCCFractionOfZeroSizeRequestsChroma[],
5607                 unsigned int      HTotal[],
5608                 unsigned int      VTotal[],
5609                 double    PixelClock[],
5610                 double    VRatio[],
5611                 enum dm_rotation_angle SourceRotation[],
5612                 unsigned int   BlockHeight256BytesY[],
5613                 unsigned int   BlockWidth256BytesY[],
5614                 unsigned int   BlockHeight256BytesC[],
5615                 unsigned int   BlockWidth256BytesC[],
5616                 unsigned int   DCCYMaxUncompressedBlock[],
5617                 unsigned int   DCCCMaxUncompressedBlock[],
5618                 unsigned int      VActive[],
5619                 bool   DCCEnable[],
5620                 bool   WritebackEnable[],
5621                 double    ReadBandwidthSurfaceLuma[],
5622                 double    ReadBandwidthSurfaceChroma[],
5623                 double    meta_row_bw[],
5624                 double    dpte_row_bw[],
5625
5626                 /* Output */
5627                 double   *StutterEfficiencyNotIncludingVBlank,
5628                 double   *StutterEfficiency,
5629                 unsigned int     *NumberOfStutterBurstsPerFrame,
5630                 double   *Z8StutterEfficiencyNotIncludingVBlank,
5631                 double   *Z8StutterEfficiency,
5632                 unsigned int     *Z8NumberOfStutterBurstsPerFrame,
5633                 double   *StutterPeriod,
5634                 bool  *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
5635 {
5636
5637         bool FoundCriticalSurface = false;
5638         unsigned int SwathSizeCriticalSurface = 0;
5639         unsigned int LastChunkOfSwathSize;
5640         unsigned int MissingPartOfLastSwathOfDETSize;
5641         double LastZ8StutterPeriod = 0.0;
5642         double LastStutterPeriod = 0.0;
5643         unsigned int TotalNumberOfActiveOTG = 0;
5644         double doublePixelClock;
5645         unsigned int doubleHTotal;
5646         unsigned int doubleVTotal;
5647         bool SameTiming = true;
5648         double DETBufferingTimeY;
5649         double SwathWidthYCriticalSurface = 0.0;
5650         double SwathHeightYCriticalSurface = 0.0;
5651         double VActiveTimeCriticalSurface = 0.0;
5652         double FrameTimeCriticalSurface = 0.0;
5653         unsigned int BytePerPixelYCriticalSurface = 0;
5654         double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
5655         unsigned int DETBufferSizeYCriticalSurface = 0;
5656         double MinTTUVBlankCriticalSurface = 0.0;
5657         unsigned int BlockWidth256BytesYCriticalSurface = 0;
5658         bool doublePlaneCriticalSurface = 0;
5659         bool doublePipeCriticalSurface = 0;
5660         double TotalCompressedReadBandwidth;
5661         double TotalRowReadBandwidth;
5662         double AverageDCCCompressionRate;
5663         double EffectiveCompressedBufferSize;
5664         double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
5665         double StutterBurstTime;
5666         unsigned int TotalActiveWriteback;
5667         double LinesInDETY;
5668         double LinesInDETYRoundedDownToSwath;
5669         double MaximumEffectiveCompressionLuma;
5670         double MaximumEffectiveCompressionChroma;
5671         double TotalZeroSizeRequestReadBandwidth;
5672         double TotalZeroSizeCompressedReadBandwidth;
5673         double AverageDCCZeroSizeFraction;
5674         double AverageZeroSizeCompressionRate;
5675         unsigned int k;
5676
5677         TotalZeroSizeRequestReadBandwidth = 0;
5678         TotalZeroSizeCompressedReadBandwidth = 0;
5679         TotalRowReadBandwidth = 0;
5680         TotalCompressedReadBandwidth = 0;
5681
5682         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5683                 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5684                         if (DCCEnable[k] == true) {
5685                                 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
5686                                                 || (!IsVertical(SourceRotation[k])
5687                                                                 && BlockHeight256BytesY[k] > SwathHeightY[k])
5688                                                 || DCCYMaxUncompressedBlock[k] < 256) {
5689                                         MaximumEffectiveCompressionLuma = 2;
5690                                 } else {
5691                                         MaximumEffectiveCompressionLuma = 4;
5692                                 }
5693                                 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5694                                                 + ReadBandwidthSurfaceLuma[k]
5695                                                                 / dml_min(NetDCCRateLuma[k],
5696                                                                                 MaximumEffectiveCompressionLuma);
5697 #ifdef __DML_VBA_DEBUG__
5698                                 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5699                                                 __func__, k, ReadBandwidthSurfaceLuma[k]);
5700                                 dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
5701                                                 __func__, k, NetDCCRateLuma[k]);
5702                                 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
5703                                                 __func__, k, MaximumEffectiveCompressionLuma);
5704 #endif
5705                                 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5706                                                 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
5707                                 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5708                                                 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
5709                                                                 / MaximumEffectiveCompressionLuma;
5710
5711                                 if (ReadBandwidthSurfaceChroma[k] > 0) {
5712                                         if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
5713                                                         || (!IsVertical(SourceRotation[k])
5714                                                                         && BlockHeight256BytesC[k] > SwathHeightC[k])
5715                                                         || DCCCMaxUncompressedBlock[k] < 256) {
5716                                                 MaximumEffectiveCompressionChroma = 2;
5717                                         } else {
5718                                                 MaximumEffectiveCompressionChroma = 4;
5719                                         }
5720                                         TotalCompressedReadBandwidth =
5721                                                         TotalCompressedReadBandwidth
5722                                                         + ReadBandwidthSurfaceChroma[k]
5723                                                         / dml_min(NetDCCRateChroma[k],
5724                                                         MaximumEffectiveCompressionChroma);
5725 #ifdef __DML_VBA_DEBUG__
5726                                         dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
5727                                                         __func__, k, ReadBandwidthSurfaceChroma[k]);
5728                                         dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
5729                                                         __func__, k, NetDCCRateChroma[k]);
5730                                         dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
5731                                                         __func__, k, MaximumEffectiveCompressionChroma);
5732 #endif
5733                                         TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5734                                                         + ReadBandwidthSurfaceChroma[k]
5735                                                                         * DCCFractionOfZeroSizeRequestsChroma[k];
5736                                         TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5737                                                         + ReadBandwidthSurfaceChroma[k]
5738                                                                         * DCCFractionOfZeroSizeRequestsChroma[k]
5739                                                                         / MaximumEffectiveCompressionChroma;
5740                                 }
5741                         } else {
5742                                 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5743                                                 + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
5744                         }
5745                         TotalRowReadBandwidth = TotalRowReadBandwidth
5746                                         + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
5747                 }
5748         }
5749
5750         AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
5751         AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
5752
5753 #ifdef __DML_VBA_DEBUG__
5754         dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
5755         dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
5756         dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
5757         dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
5758                         __func__, TotalZeroSizeCompressedReadBandwidth);
5759         dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
5760         dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
5761         dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5762         dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
5763         dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
5764         dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
5765         dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
5766 #endif
5767         if (AverageDCCZeroSizeFraction == 1) {
5768                 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5769                                 / TotalZeroSizeCompressedReadBandwidth;
5770                 EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
5771                                 * AverageZeroSizeCompressionRate
5772                                 + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5773                                                 * AverageZeroSizeCompressionRate;
5774         } else if (AverageDCCZeroSizeFraction > 0) {
5775                 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5776                                 / TotalZeroSizeCompressedReadBandwidth;
5777                 EffectiveCompressedBufferSize = dml_min(
5778                                 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5779                                 (double) MetaFIFOSizeInKEntries * 1024 * 64
5780                                         / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
5781                                         + 1 / AverageDCCCompressionRate))
5782                                         + dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5783                                         * AverageDCCCompressionRate,
5784                                         ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5785                                         / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5786
5787 #ifdef __DML_VBA_DEBUG__
5788                 dml_print("DML::%s: min 1 = %f\n", __func__,
5789                                 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5790                 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
5791                                 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
5792                                                 AverageDCCCompressionRate));
5793                 dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
5794                                 CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
5795                 dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
5796                                 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5797 #endif
5798         } else {
5799                 EffectiveCompressedBufferSize = dml_min(
5800                                 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5801                                 (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
5802                                 + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5803                                                 * AverageDCCCompressionRate;
5804
5805 #ifdef __DML_VBA_DEBUG__
5806                 dml_print("DML::%s: min 1 = %f\n", __func__,
5807                                 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5808                 dml_print("DML::%s: min 2 = %f\n", __func__,
5809                                 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
5810 #endif
5811         }
5812
5813 #ifdef __DML_VBA_DEBUG__
5814         dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
5815         dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
5816         dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5817 #endif
5818
5819         *StutterPeriod = 0;
5820
5821         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5822                 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5823                         LinesInDETY = ((double) DETBufferSizeY[k]
5824                                         + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
5825                                                         * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
5826                                         / BytePerPixelDETY[k] / SwathWidthY[k];
5827                         LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
5828                         DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
5829                                         / VRatio[k];
5830 #ifdef __DML_VBA_DEBUG__
5831                         dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
5832                         dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
5833                         dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
5834                         dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5835                                         __func__, k, ReadBandwidthSurfaceLuma[k]);
5836                         dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
5837                         dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
5838                         dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
5839                                         __func__, k, LinesInDETYRoundedDownToSwath);
5840                         dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
5841                         dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5842                         dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
5843                         dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
5844                         dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5845 #endif
5846
5847                         if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
5848                                 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
5849
5850                                 FoundCriticalSurface = true;
5851                                 *StutterPeriod = DETBufferingTimeY;
5852                                 FrameTimeCriticalSurface = (
5853                                                 isInterlaceTiming ?
5854                                                                 dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k])
5855                                                 * (double) HTotal[k] / PixelClock[k];
5856                                 VActiveTimeCriticalSurface = (
5857                                                 isInterlaceTiming ?
5858                                                                 dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k])
5859                                                 * (double) HTotal[k] / PixelClock[k];
5860                                 BytePerPixelYCriticalSurface = BytePerPixelY[k];
5861                                 SwathWidthYCriticalSurface = SwathWidthY[k];
5862                                 SwathHeightYCriticalSurface = SwathHeightY[k];
5863                                 BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
5864                                 LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
5865                                                 - (LinesInDETY - LinesInDETYRoundedDownToSwath);
5866                                 DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
5867                                 MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
5868                                 doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
5869                                 doublePipeCriticalSurface = (DPPPerSurface[k] == 1);
5870
5871 #ifdef __DML_VBA_DEBUG__
5872                                 dml_print("DML::%s: k=%0d, FoundCriticalSurface                = %d\n",
5873                                                 __func__, k, FoundCriticalSurface);
5874                                 dml_print("DML::%s: k=%0d, StutterPeriod                       = %f\n",
5875                                                 __func__, k, *StutterPeriod);
5876                                 dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface         = %f\n",
5877                                                 __func__, k, MinTTUVBlankCriticalSurface);
5878                                 dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface            = %f\n",
5879                                                 __func__, k, FrameTimeCriticalSurface);
5880                                 dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface          = %f\n",
5881                                                 __func__, k, VActiveTimeCriticalSurface);
5882                                 dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface        = %d\n",
5883                                                 __func__, k, BytePerPixelYCriticalSurface);
5884                                 dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface          = %f\n",
5885                                                 __func__, k, SwathWidthYCriticalSurface);
5886                                 dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface         = %f\n",
5887                                                 __func__, k, SwathHeightYCriticalSurface);
5888                                 dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface  = %d\n",
5889                                                 __func__, k, BlockWidth256BytesYCriticalSurface);
5890                                 dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface          = %d\n",
5891                                                 __func__, k, doublePlaneCriticalSurface);
5892                                 dml_print("DML::%s: k=%0d, doublePipeCriticalSurface           = %d\n",
5893                                                 __func__, k, doublePipeCriticalSurface);
5894                                 dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
5895                                                 __func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
5896 #endif
5897                         }
5898                 }
5899         }
5900
5901         PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth,
5902                         EffectiveCompressedBufferSize);
5903 #ifdef __DML_VBA_DEBUG__
5904         dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
5905         dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5906         dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5907                         __func__, *StutterPeriod * TotalDataReadBandwidth);
5908         dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5909         dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
5910                         PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
5911         dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
5912         dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
5913         dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
5914         dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5915 #endif
5916
5917         StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
5918                         / ReturnBW
5919                         + (*StutterPeriod * TotalDataReadBandwidth
5920                                         - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
5921                         + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
5922 #ifdef __DML_VBA_DEBUG__
5923         dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
5924                         AverageDCCCompressionRate / ReturnBW);
5925         dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5926                         __func__, (*StutterPeriod * TotalDataReadBandwidth));
5927         dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
5928                         PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
5929         dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
5930         dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5931 #endif
5932         StutterBurstTime = dml_max(StutterBurstTime,
5933                         LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
5934                                         * SwathWidthYCriticalSurface / ReturnBW);
5935
5936 #ifdef __DML_VBA_DEBUG__
5937         dml_print("DML::%s: Time to finish residue swath=%f\n",
5938                         __func__,
5939                         LinesToFinishSwathTransferStutterCriticalSurface *
5940                         BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
5941 #endif
5942
5943         TotalActiveWriteback = 0;
5944         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5945                 if (WritebackEnable[k])
5946                         TotalActiveWriteback = TotalActiveWriteback + 1;
5947         }
5948
5949         if (TotalActiveWriteback == 0) {
5950 #ifdef __DML_VBA_DEBUG__
5951                 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
5952                 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
5953                 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
5954                 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5955 #endif
5956                 *StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5957                                 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
5958                 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5959                                 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
5960                 *NumberOfStutterBurstsPerFrame = (
5961                                 *StutterEfficiencyNotIncludingVBlank > 0 ?
5962                                                 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5963                 *Z8NumberOfStutterBurstsPerFrame = (
5964                                 *Z8StutterEfficiencyNotIncludingVBlank > 0 ?
5965                                                 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5966         } else {
5967                 *StutterEfficiencyNotIncludingVBlank = 0.;
5968                 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
5969                 *NumberOfStutterBurstsPerFrame = 0;
5970                 *Z8NumberOfStutterBurstsPerFrame = 0;
5971         }
5972 #ifdef __DML_VBA_DEBUG__
5973         dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
5974         dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5975                         __func__, *StutterEfficiencyNotIncludingVBlank);
5976         dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
5977                         __func__, *Z8StutterEfficiencyNotIncludingVBlank);
5978         dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
5979         dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5980 #endif
5981
5982         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5983                 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5984                         if (BlendingAndTiming[k] == k) {
5985                                 if (TotalNumberOfActiveOTG == 0) {
5986                                         doublePixelClock = PixelClock[k];
5987                                         doubleHTotal = HTotal[k];
5988                                         doubleVTotal = VTotal[k];
5989                                 } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
5990                                                 || doubleVTotal != VTotal[k]) {
5991                                         SameTiming = false;
5992                                 }
5993                                 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
5994                         }
5995                 }
5996         }
5997
5998         if (*StutterEfficiencyNotIncludingVBlank > 0) {
5999                 LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6000
6001                 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
6002                                 && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
6003                         *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
6004                                                 + StutterBurstTime * VActiveTimeCriticalSurface
6005                                                 / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6006                 } else {
6007                         *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6008                 }
6009         } else {
6010                 *StutterEfficiency = 0;
6011         }
6012
6013         if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6014                 LastZ8StutterPeriod = VActiveTimeCriticalSurface
6015                                 - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6016                 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
6017                                 MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
6018                         *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
6019                                 * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6020                 } else {
6021                         *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6022                 }
6023         } else {
6024                 *Z8StutterEfficiency = 0.;
6025         }
6026
6027 #ifdef __DML_VBA_DEBUG__
6028         dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6029         dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6030         dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6031         dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6032         dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6033         dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6034         dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
6035                         __func__, *StutterEfficiencyNotIncludingVBlank);
6036         dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6037 #endif
6038
6039         SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
6040                         * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface);
6041         LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
6042         MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface)
6043                         - DETBufferSizeYCriticalSurface;
6044
6045         *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
6046                         && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
6047                         && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
6048                         && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
6049
6050 #ifdef __DML_VBA_DEBUG__
6051         dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
6052         dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
6053         dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
6054         dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
6055 #endif
6056 } // CalculateStutterEfficiency
6057
6058 void dml32_CalculateMaxDETAndMinCompressedBufferSize(
6059                 unsigned int    ConfigReturnBufferSizeInKByte,
6060                 unsigned int    ROBBufferSizeInKByte,
6061                 unsigned int MaxNumDPP,
6062                 bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
6063                 unsigned int nomDETInKByteOverrideValue,  // VBA_DELTA
6064
6065                 /* Output */
6066                 unsigned int *MaxTotalDETInKByte,
6067                 unsigned int *nomDETInKByte,
6068                 unsigned int *MinCompressedBufferSizeInKByte)
6069 {
6070         bool     det_buff_size_override_en  = nomDETInKByteOverrideEnable;
6071         unsigned int        det_buff_size_override_val = nomDETInKByteOverrideValue;
6072
6073         *MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte +
6074                         (double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64);
6075         *nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64);
6076         *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
6077
6078 #ifdef __DML_VBA_DEBUG__
6079         dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
6080         dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
6081         dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
6082         dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
6083         dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
6084         dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
6085 #endif
6086
6087         if (det_buff_size_override_en) {
6088                 *nomDETInKByte = det_buff_size_override_val;
6089 #ifdef __DML_VBA_DEBUG__
6090                 dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
6091 #endif
6092         }
6093 } // CalculateMaxDETAndMinCompressedBufferSize
6094
6095 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
6096                 double ReturnBW,
6097                 bool NotUrgentLatencyHiding[],
6098                 double ReadBandwidthLuma[],
6099                 double ReadBandwidthChroma[],
6100                 double cursor_bw[],
6101                 double meta_row_bandwidth[],
6102                 double dpte_row_bandwidth[],
6103                 unsigned int NumberOfDPP[],
6104                 double UrgentBurstFactorLuma[],
6105                 double UrgentBurstFactorChroma[],
6106                 double UrgentBurstFactorCursor[])
6107 {
6108         unsigned int k;
6109         bool NotEnoughUrgentLatencyHiding = false;
6110         bool CalculateVActiveBandwithSupport_val = false;
6111         double VActiveBandwith = 0;
6112
6113         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6114                 if (NotUrgentLatencyHiding[k]) {
6115                         NotEnoughUrgentLatencyHiding = true;
6116                 }
6117         }
6118
6119         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6120                 VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
6121         }
6122
6123         CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6124
6125 #ifdef __DML_VBA_DEBUG__
6126 dml_print("DML::%s: NotEnoughUrgentLatencyHiding        = %d\n", __func__, NotEnoughUrgentLatencyHiding);
6127 dml_print("DML::%s: VActiveBandwith                     = %f\n", __func__, VActiveBandwith);
6128 dml_print("DML::%s: ReturnBW                            = %f\n", __func__, ReturnBW);
6129 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
6130 #endif
6131         return CalculateVActiveBandwithSupport_val;
6132 }
6133
6134 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
6135                 double ReturnBW,
6136                 bool NotUrgentLatencyHiding[],
6137                 double ReadBandwidthLuma[],
6138                 double ReadBandwidthChroma[],
6139                 double PrefetchBandwidthLuma[],
6140                 double PrefetchBandwidthChroma[],
6141                 double cursor_bw[],
6142                 double meta_row_bandwidth[],
6143                 double dpte_row_bandwidth[],
6144                 double cursor_bw_pre[],
6145                 double prefetch_vmrow_bw[],
6146                 unsigned int NumberOfDPP[],
6147                 double UrgentBurstFactorLuma[],
6148                 double UrgentBurstFactorChroma[],
6149                 double UrgentBurstFactorCursor[],
6150                 double UrgentBurstFactorLumaPre[],
6151                 double UrgentBurstFactorChromaPre[],
6152                 double UrgentBurstFactorCursorPre[],
6153
6154                 /* output */
6155                 double  *PrefetchBandwidth,
6156                 double  *FractionOfUrgentBandwidth,
6157                 bool *PrefetchBandwidthSupport)
6158 {
6159         unsigned int k;
6160         bool NotEnoughUrgentLatencyHiding = false;
6161         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6162                 if (NotUrgentLatencyHiding[k]) {
6163                         NotEnoughUrgentLatencyHiding = true;
6164                 }
6165         }
6166
6167         *PrefetchBandwidth = 0;
6168         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6169                 *PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6170                                 ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]),
6171                                 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6172         }
6173
6174         *PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6175         *FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW;
6176 }
6177
6178 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
6179                 double ReturnBW,
6180                 double ReadBandwidthLuma[],
6181                 double ReadBandwidthChroma[],
6182                 double PrefetchBandwidthLuma[],
6183                 double PrefetchBandwidthChroma[],
6184                 double cursor_bw[],
6185                 double cursor_bw_pre[],
6186                 unsigned int NumberOfDPP[],
6187                 double UrgentBurstFactorLuma[],
6188                 double UrgentBurstFactorChroma[],
6189                 double UrgentBurstFactorCursor[],
6190                 double UrgentBurstFactorLumaPre[],
6191                 double UrgentBurstFactorChromaPre[],
6192                 double UrgentBurstFactorCursorPre[])
6193 {
6194         unsigned int k;
6195         double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;
6196
6197         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6198                 CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6199                                 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6200         }
6201
6202         return CalculateBandwidthAvailableForImmediateFlip_val;
6203 }
6204
6205 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
6206                 double ReturnBW,
6207                 enum immediate_flip_requirement ImmediateFlipRequirement[],
6208                 double final_flip_bw[],
6209                 double ReadBandwidthLuma[],
6210                 double ReadBandwidthChroma[],
6211                 double PrefetchBandwidthLuma[],
6212                 double PrefetchBandwidthChroma[],
6213                 double cursor_bw[],
6214                 double meta_row_bandwidth[],
6215                 double dpte_row_bandwidth[],
6216                 double cursor_bw_pre[],
6217                 double prefetch_vmrow_bw[],
6218                 unsigned int NumberOfDPP[],
6219                 double UrgentBurstFactorLuma[],
6220                 double UrgentBurstFactorChroma[],
6221                 double UrgentBurstFactorCursor[],
6222                 double UrgentBurstFactorLumaPre[],
6223                 double UrgentBurstFactorChromaPre[],
6224                 double UrgentBurstFactorCursorPre[],
6225
6226                 /* output */
6227                 double  *TotalBandwidth,
6228                 double  *FractionOfUrgentBandwidth,
6229                 bool *ImmediateFlipBandwidthSupport)
6230 {
6231         unsigned int k;
6232         *TotalBandwidth = 0;
6233         for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6234                 if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
6235                         *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6236                                         NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6237                                         NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6238                 } else {
6239                         *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6240                                         NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6241                                         NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6242                 }
6243         }
6244         *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
6245         *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
6246 }