2 * Copyright 2017 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
27 #include "../display_mode_lib.h"
28 #include "../dcn30/display_mode_vba_30.h"
29 #include "display_mode_vba_31.h"
30 #include "../dml_inline_defs.h"
34 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
36 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
37 * ways. Unless there is something clearly wrong with it the code should
38 * remain as-is as it provides us with a guarantee from HW that it is correct.
42 #define BPP_BLENDED_PIPE 0xffffffff
43 #define DCN31_MAX_DSC_IMAGE_WIDTH 5184
44 #define DCN31_MAX_FMT_420_BUFFER_WIDTH 4096
45 #define DCN3_15_MIN_COMPBUF_SIZE_KB 128
46 #define DCN3_15_MAX_DET_SIZE 384
48 // For DML-C changes that hasn't been propagated to VBA yet
49 //#define __DML_VBA_ALLOW_DELTA__
51 // Move these to ip paramaters/constant
53 // At which vstartup the DML start to try if the mode can be supported
54 #define __DML_VBA_MIN_VSTARTUP__ 9
56 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
57 #define __DML_ARB_TO_RET_DELAY__ (7 + 95)
59 // fudge factor for min dcfclk calclation
60 #define __DML_MIN_DCFCLK_FACTOR__ 1.15
66 double DCFCLKDeepSleep;
67 unsigned int DPPPerPlane;
71 enum scan_direction_class SourceScan;
72 unsigned int BlockWidth256BytesY;
73 unsigned int BlockHeight256BytesY;
74 unsigned int BlockWidth256BytesC;
75 unsigned int BlockHeight256BytesC;
76 unsigned int InterlaceEnable;
77 unsigned int NumberOfCursors;
80 unsigned int DCCEnable;
81 bool ODMCombineIsEnabled;
82 enum source_format_class SourcePixelFormat;
85 bool ProgressiveToInterlaceUnitInOPP;
89 #define BPP_BLENDED_PIPE 0xffffffff
91 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
92 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
93 static unsigned int dscceComputeDelay(
96 unsigned int sliceWidth,
97 unsigned int numSlices,
98 enum output_format_class pixelFormat,
99 enum output_encoder_class Output);
100 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
101 static bool CalculatePrefetchSchedule(
102 struct display_mode_lib *mode_lib,
103 double HostVMInefficiencyFactor,
105 unsigned int DSCDelay,
106 double DPPCLKDelaySubtotalPlusCNVCFormater,
107 double DPPCLKDelaySCL,
108 double DPPCLKDelaySCLLBOnly,
109 double DPPCLKDelayCNVCCursor,
110 double DISPCLKDelaySubtotal,
111 unsigned int DPP_RECOUT_WIDTH,
112 enum output_format_class OutputFormat,
113 unsigned int MaxInterDCNTileRepeaters,
114 unsigned int VStartup,
115 unsigned int MaxVStartup,
116 unsigned int GPUVMPageTableLevels,
119 unsigned int HostVMMaxNonCachedPageTableLevels,
120 double HostVMMinPageSize,
121 bool DynamicMetadataEnable,
122 bool DynamicMetadataVMEnabled,
123 int DynamicMetadataLinesBeforeActiveRequired,
124 unsigned int DynamicMetadataTransmittedBytes,
125 double UrgentLatency,
126 double UrgentExtraLatency,
128 unsigned int PDEAndMetaPTEBytesFrame,
129 unsigned int MetaRowByte,
130 unsigned int PixelPTEBytesPerRow,
131 double PrefetchSourceLinesY,
132 unsigned int SwathWidthY,
133 double VInitPreFillY,
134 unsigned int MaxNumSwathY,
135 double PrefetchSourceLinesC,
136 unsigned int SwathWidthC,
137 double VInitPreFillC,
138 unsigned int MaxNumSwathC,
139 int swath_width_luma_ub,
140 int swath_width_chroma_ub,
141 unsigned int SwathHeightY,
142 unsigned int SwathHeightC,
144 double *DSTXAfterScaler,
145 double *DSTYAfterScaler,
146 double *DestinationLinesForPrefetch,
147 double *PrefetchBandwidth,
148 double *DestinationLinesToRequestVMInVBlank,
149 double *DestinationLinesToRequestRowInVBlank,
150 double *VRatioPrefetchY,
151 double *VRatioPrefetchC,
152 double *RequiredPrefetchPixDataBWLuma,
153 double *RequiredPrefetchPixDataBWChroma,
154 bool *NotEnoughTimeForDynamicMetadata,
156 double *prefetch_vmrow_bw,
160 int *VUpdateOffsetPix,
161 double *VUpdateWidthPix,
162 double *VReadyOffsetPix);
163 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
164 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
165 static void CalculateDCCConfiguration(
167 bool DCCProgrammingAssumesScanDirectionUnknown,
168 enum source_format_class SourcePixelFormat,
169 unsigned int SurfaceWidthLuma,
170 unsigned int SurfaceWidthChroma,
171 unsigned int SurfaceHeightLuma,
172 unsigned int SurfaceHeightChroma,
173 double DETBufferSize,
174 unsigned int RequestHeight256ByteLuma,
175 unsigned int RequestHeight256ByteChroma,
176 enum dm_swizzle_mode TilingFormat,
177 unsigned int BytePerPixelY,
178 unsigned int BytePerPixelC,
179 double BytePerPixelDETY,
180 double BytePerPixelDETC,
181 enum scan_direction_class ScanOrientation,
182 unsigned int *MaxUncompressedBlockLuma,
183 unsigned int *MaxUncompressedBlockChroma,
184 unsigned int *MaxCompressedBlockLuma,
185 unsigned int *MaxCompressedBlockChroma,
186 unsigned int *IndependentBlockLuma,
187 unsigned int *IndependentBlockChroma);
188 static double CalculatePrefetchSourceLines(
189 struct display_mode_lib *mode_lib,
193 bool ProgressiveToInterlaceUnitInOPP,
194 unsigned int SwathHeight,
195 unsigned int ViewportYStart,
196 double *VInitPreFill,
197 unsigned int *MaxNumSwath);
198 static unsigned int CalculateVMAndRowBytes(
199 struct display_mode_lib *mode_lib,
201 unsigned int BlockHeight256Bytes,
202 unsigned int BlockWidth256Bytes,
203 enum source_format_class SourcePixelFormat,
204 unsigned int SurfaceTiling,
205 unsigned int BytePerPixel,
206 enum scan_direction_class ScanDirection,
207 unsigned int SwathWidth,
208 unsigned int ViewportHeight,
211 unsigned int HostVMMaxNonCachedPageTableLevels,
212 unsigned int GPUVMMinPageSize,
213 unsigned int HostVMMinPageSize,
214 unsigned int PTEBufferSizeInRequests,
216 unsigned int DCCMetaPitch,
217 unsigned int *MacroTileWidth,
218 unsigned int *MetaRowByte,
219 unsigned int *PixelPTEBytesPerRow,
220 bool *PTEBufferSizeNotExceeded,
221 int *dpte_row_width_ub,
222 unsigned int *dpte_row_height,
223 unsigned int *MetaRequestWidth,
224 unsigned int *MetaRequestHeight,
225 unsigned int *meta_row_width,
226 unsigned int *meta_row_height,
228 unsigned int *dpte_group_bytes,
229 unsigned int *PixelPTEReqWidth,
230 unsigned int *PixelPTEReqHeight,
231 unsigned int *PTERequestSize,
232 int *DPDE0BytesFrame,
233 int *MetaPTEBytesFrame);
234 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
235 static void CalculateRowBandwidth(
237 enum source_format_class SourcePixelFormat,
242 unsigned int MetaRowByteLuma,
243 unsigned int MetaRowByteChroma,
244 unsigned int meta_row_height_luma,
245 unsigned int meta_row_height_chroma,
246 unsigned int PixelPTEBytesPerRowLuma,
247 unsigned int PixelPTEBytesPerRowChroma,
248 unsigned int dpte_row_height_luma,
249 unsigned int dpte_row_height_chroma,
251 double *dpte_row_bw);
253 static void CalculateFlipSchedule(
254 struct display_mode_lib *mode_lib,
256 double HostVMInefficiencyFactor,
257 double UrgentExtraLatency,
258 double UrgentLatency,
259 double PDEAndMetaPTEBytesPerFrame,
261 double DPTEBytesPerRow);
262 static double CalculateWriteBackDelay(
263 enum source_format_class WritebackPixelFormat,
264 double WritebackHRatio,
265 double WritebackVRatio,
266 unsigned int WritebackVTaps,
267 int WritebackDestinationWidth,
268 int WritebackDestinationHeight,
269 int WritebackSourceHeight,
270 unsigned int HTotal);
272 static void CalculateVupdateAndDynamicMetadataParameters(
273 int MaxInterDCNTileRepeaters,
276 double DCFClkDeepSleep,
280 int DynamicMetadataTransmittedBytes,
281 int DynamicMetadataLinesBeforeActiveRequired,
283 bool ProgressiveToInterlaceUnitInOPP,
288 int *VUpdateOffsetPix,
289 double *VUpdateWidthPix,
290 double *VReadyOffsetPix);
292 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
293 struct display_mode_lib *mode_lib,
294 unsigned int PrefetchMode,
297 double UrgentLatency,
300 double DCFCLKDeepSleep,
301 unsigned int DETBufferSizeY[],
302 unsigned int DETBufferSizeC[],
303 unsigned int SwathHeightY[],
304 unsigned int SwathHeightC[],
305 double SwathWidthY[],
306 double SwathWidthC[],
307 unsigned int DPPPerPlane[],
308 double BytePerPixelDETY[],
309 double BytePerPixelDETC[],
310 bool UnboundedRequestEnabled,
311 int unsigned CompressedBufferSizeInkByte,
312 enum clock_change_support *DRAMClockChangeSupport,
313 double *StutterExitWatermark,
314 double *StutterEnterPlusExitWatermark,
315 double *Z8StutterExitWatermark,
316 double *Z8StutterEnterPlusExitWatermark);
318 static void CalculateDCFCLKDeepSleep(
319 struct display_mode_lib *mode_lib,
320 unsigned int NumberOfActivePlanes,
324 double VRatioChroma[],
325 double SwathWidthY[],
326 double SwathWidthC[],
327 unsigned int DPPPerPlane[],
329 double HRatioChroma[],
331 double PSCL_THROUGHPUT[],
332 double PSCL_THROUGHPUT_CHROMA[],
334 double ReadBandwidthLuma[],
335 double ReadBandwidthChroma[],
337 double *DCFCLKDeepSleep);
339 static void CalculateUrgentBurstFactor(
340 int swath_width_luma_ub,
341 int swath_width_chroma_ub,
342 unsigned int SwathHeightY,
343 unsigned int SwathHeightC,
345 double UrgentLatency,
346 double CursorBufferSize,
347 unsigned int CursorWidth,
348 unsigned int CursorBPP,
351 double BytePerPixelInDETY,
352 double BytePerPixelInDETC,
353 double DETBufferSizeY,
354 double DETBufferSizeC,
355 double *UrgentBurstFactorCursor,
356 double *UrgentBurstFactorLuma,
357 double *UrgentBurstFactorChroma,
358 bool *NotEnoughUrgentLatencyHiding);
360 static void UseMinimumDCFCLK(
361 struct display_mode_lib *mode_lib,
363 int ReorderingBytes);
365 static void CalculatePixelDeliveryTimes(
366 unsigned int NumberOfActivePlanes,
368 double VRatioChroma[],
369 double VRatioPrefetchY[],
370 double VRatioPrefetchC[],
371 unsigned int swath_width_luma_ub[],
372 unsigned int swath_width_chroma_ub[],
373 unsigned int DPPPerPlane[],
375 double HRatioChroma[],
377 double PSCL_THROUGHPUT[],
378 double PSCL_THROUGHPUT_CHROMA[],
381 enum scan_direction_class SourceScan[],
382 unsigned int NumberOfCursors[],
383 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
384 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
385 unsigned int BlockWidth256BytesY[],
386 unsigned int BlockHeight256BytesY[],
387 unsigned int BlockWidth256BytesC[],
388 unsigned int BlockHeight256BytesC[],
389 double DisplayPipeLineDeliveryTimeLuma[],
390 double DisplayPipeLineDeliveryTimeChroma[],
391 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
392 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
393 double DisplayPipeRequestDeliveryTimeLuma[],
394 double DisplayPipeRequestDeliveryTimeChroma[],
395 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
396 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
397 double CursorRequestDeliveryTime[],
398 double CursorRequestDeliveryTimePrefetch[]);
400 static void CalculateMetaAndPTETimes(
401 int NumberOfActivePlanes,
404 int MinMetaChunkSizeBytes,
407 double VRatioChroma[],
408 double DestinationLinesToRequestRowInVBlank[],
409 double DestinationLinesToRequestRowInImmediateFlip[],
414 enum scan_direction_class SourceScan[],
415 int dpte_row_height[],
416 int dpte_row_height_chroma[],
417 int meta_row_width[],
418 int meta_row_width_chroma[],
419 int meta_row_height[],
420 int meta_row_height_chroma[],
421 int meta_req_width[],
422 int meta_req_width_chroma[],
423 int meta_req_height[],
424 int meta_req_height_chroma[],
425 int dpte_group_bytes[],
426 int PTERequestSizeY[],
427 int PTERequestSizeC[],
428 int PixelPTEReqWidthY[],
429 int PixelPTEReqHeightY[],
430 int PixelPTEReqWidthC[],
431 int PixelPTEReqHeightC[],
432 int dpte_row_width_luma_ub[],
433 int dpte_row_width_chroma_ub[],
434 double DST_Y_PER_PTE_ROW_NOM_L[],
435 double DST_Y_PER_PTE_ROW_NOM_C[],
436 double DST_Y_PER_META_ROW_NOM_L[],
437 double DST_Y_PER_META_ROW_NOM_C[],
438 double TimePerMetaChunkNominal[],
439 double TimePerChromaMetaChunkNominal[],
440 double TimePerMetaChunkVBlank[],
441 double TimePerChromaMetaChunkVBlank[],
442 double TimePerMetaChunkFlip[],
443 double TimePerChromaMetaChunkFlip[],
444 double time_per_pte_group_nom_luma[],
445 double time_per_pte_group_vblank_luma[],
446 double time_per_pte_group_flip_luma[],
447 double time_per_pte_group_nom_chroma[],
448 double time_per_pte_group_vblank_chroma[],
449 double time_per_pte_group_flip_chroma[]);
451 static void CalculateVMGroupAndRequestTimes(
452 unsigned int NumberOfActivePlanes,
454 unsigned int GPUVMMaxPageTableLevels,
455 unsigned int HTotal[],
457 double DestinationLinesToRequestVMInVBlank[],
458 double DestinationLinesToRequestVMInImmediateFlip[],
461 int dpte_row_width_luma_ub[],
462 int dpte_row_width_chroma_ub[],
463 int vm_group_bytes[],
464 unsigned int dpde0_bytes_per_frame_ub_l[],
465 unsigned int dpde0_bytes_per_frame_ub_c[],
466 int meta_pte_bytes_per_frame_ub_l[],
467 int meta_pte_bytes_per_frame_ub_c[],
468 double TimePerVMGroupVBlank[],
469 double TimePerVMGroupFlip[],
470 double TimePerVMRequestVBlank[],
471 double TimePerVMRequestFlip[]);
473 static void CalculateStutterEfficiency(
474 struct display_mode_lib *mode_lib,
475 int CompressedBufferSizeInkByte,
476 bool UnboundedRequestEnabled,
477 int ConfigReturnBufferSizeInKByte,
478 int MetaFIFOSizeInKEntries,
479 int ZeroSizeBufferEntries,
480 int NumberOfActivePlanes,
481 int ROBBufferSizeInKByte,
482 double TotalDataReadBandwidth,
485 double COMPBUF_RESERVED_SPACE_64B,
486 double COMPBUF_RESERVED_SPACE_ZS,
489 bool SynchronizedVBlank,
490 double Z8StutterEnterPlusExitWatermark,
491 double StutterEnterPlusExitWatermark,
492 bool ProgressiveToInterlaceUnitInOPP,
494 double MinTTUVBlank[],
496 unsigned int DETBufferSizeY[],
498 double BytePerPixelDETY[],
499 double SwathWidthY[],
502 double NetDCCRateLuma[],
503 double NetDCCRateChroma[],
504 double DCCFractionOfZeroSizeRequestsLuma[],
505 double DCCFractionOfZeroSizeRequestsChroma[],
510 enum scan_direction_class SourceScan[],
511 int BlockHeight256BytesY[],
512 int BlockWidth256BytesY[],
513 int BlockHeight256BytesC[],
514 int BlockWidth256BytesC[],
515 int DCCYMaxUncompressedBlock[],
516 int DCCCMaxUncompressedBlock[],
519 bool WritebackEnable[],
520 double ReadBandwidthPlaneLuma[],
521 double ReadBandwidthPlaneChroma[],
522 double meta_row_bw[],
523 double dpte_row_bw[],
524 double *StutterEfficiencyNotIncludingVBlank,
525 double *StutterEfficiency,
526 int *NumberOfStutterBurstsPerFrame,
527 double *Z8StutterEfficiencyNotIncludingVBlank,
528 double *Z8StutterEfficiency,
529 int *Z8NumberOfStutterBurstsPerFrame,
530 double *StutterPeriod);
532 static void CalculateSwathAndDETConfiguration(
534 int NumberOfActivePlanes,
535 unsigned int DETBufferSizeInKByte,
536 double MaximumSwathWidthLuma[],
537 double MaximumSwathWidthChroma[],
538 enum scan_direction_class SourceScan[],
539 enum source_format_class SourcePixelFormat[],
540 enum dm_swizzle_mode SurfaceTiling[],
542 int ViewportHeight[],
545 int SurfaceHeightY[],
546 int SurfaceHeightC[],
547 int Read256BytesBlockHeightY[],
548 int Read256BytesBlockHeightC[],
549 int Read256BytesBlockWidthY[],
550 int Read256BytesBlockWidthC[],
551 enum odm_combine_mode ODMCombineEnabled[],
552 int BlendingAndTiming[],
555 double BytePerPixDETY[],
556 double BytePerPixDETC[],
559 double HRatioChroma[],
561 int swath_width_luma_ub[],
562 int swath_width_chroma_ub[],
564 double SwathWidthChroma[],
567 unsigned int DETBufferSizeY[],
568 unsigned int DETBufferSizeC[],
569 bool ViewportSizeSupportPerPlane[],
570 bool *ViewportSizeSupport);
571 static void CalculateSwathWidth(
573 int NumberOfActivePlanes,
574 enum source_format_class SourcePixelFormat[],
575 enum scan_direction_class SourceScan[],
577 int ViewportHeight[],
580 int SurfaceHeightY[],
581 int SurfaceHeightC[],
582 enum odm_combine_mode ODMCombineEnabled[],
585 int Read256BytesBlockHeightY[],
586 int Read256BytesBlockHeightC[],
587 int Read256BytesBlockWidthY[],
588 int Read256BytesBlockWidthC[],
589 int BlendingAndTiming[],
593 double SwathWidthSingleDPPY[],
594 double SwathWidthSingleDPPC[],
595 double SwathWidthY[],
596 double SwathWidthC[],
597 int MaximumSwathHeightY[],
598 int MaximumSwathHeightC[],
599 int swath_width_luma_ub[],
600 int swath_width_chroma_ub[]);
602 static double CalculateExtraLatency(
603 int RoundTripPingLatencyCycles,
606 int TotalNumberOfActiveDPP,
607 int PixelChunkSizeInKByte,
608 int TotalNumberOfDCCActiveDPP,
613 int NumberOfActivePlanes,
615 int dpte_group_bytes[],
616 double HostVMInefficiencyFactor,
617 double HostVMMinPageSize,
618 int HostVMMaxNonCachedPageTableLevels);
620 static double CalculateExtraLatencyBytes(
622 int TotalNumberOfActiveDPP,
623 int PixelChunkSizeInKByte,
624 int TotalNumberOfDCCActiveDPP,
628 int NumberOfActivePlanes,
630 int dpte_group_bytes[],
631 double HostVMInefficiencyFactor,
632 double HostVMMinPageSize,
633 int HostVMMaxNonCachedPageTableLevels);
635 static double CalculateUrgentLatency(
636 double UrgentLatencyPixelDataOnly,
637 double UrgentLatencyPixelMixedWithVMData,
638 double UrgentLatencyVMDataOnly,
639 bool DoUrgentLatencyAdjustment,
640 double UrgentLatencyAdjustmentFabricClockComponent,
641 double UrgentLatencyAdjustmentFabricClockReference,
642 double FabricClockSingle);
644 static void CalculateUnboundedRequestAndCompressedBufferSize(
645 unsigned int DETBufferSizeInKByte,
646 int ConfigReturnBufferSizeInKByte,
647 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
651 int CompressedBufferSegmentSizeInkByteFinal,
652 enum output_encoder_class *Output,
653 bool *UnboundedRequestEnabled,
654 int *CompressedBufferSizeInkByte);
656 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
658 void dml31_recalculate(struct display_mode_lib *mode_lib)
660 ModeSupportAndSystemConfiguration(mode_lib);
661 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
662 DisplayPipeConfiguration(mode_lib);
663 #ifdef __DML_VBA_DEBUG__
664 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
666 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
669 static unsigned int dscceComputeDelay(
672 unsigned int sliceWidth,
673 unsigned int numSlices,
674 enum output_format_class pixelFormat,
675 enum output_encoder_class Output)
677 // valid bpc = source bits per component in the set of {8, 10, 12}
678 // valid bpp = increments of 1/16 of a bit
679 // min = 6/7/8 in N420/N422/444, respectively
680 // max = such that compression is 1:1
681 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
682 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
683 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
686 unsigned int rcModelSize = 8192;
688 // N422/N420 operate at 2 pixels per clock
689 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
691 if (pixelFormat == dm_420)
693 else if (pixelFormat == dm_444)
695 else if (pixelFormat == dm_n422)
697 // #all other modes operate at 1 pixel per clock
701 //initial transmit delay as per PPS
702 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
712 //divide by pixel per cycle to compute slice width as seen by DSC
713 w = sliceWidth / pixelsPerClock;
715 //422 mode has an additional cycle of delay
716 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
721 //main calculation for the dscce
722 ix = initalXmitDelay + 45;
727 ax = (a + 2) / 3 + D + 6 + 1;
728 L = (ax + wx - 1) / wx;
729 if ((ix % w) == 0 && P != 0)
733 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
735 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
736 pixels = Delay * 3 * pixelsPerClock;
740 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
742 unsigned int Delay = 0;
744 if (pixelFormat == dm_420) {
749 // dscc - input deserializer
751 // dscc gets pixels every other cycle
753 // dscc - input cdc fifo
755 // dscc gets pixels every other cycle
757 // dscc - cdc uncertainty
759 // dscc - output cdc fifo
761 // dscc gets pixels every other cycle
763 // dscc - cdc uncertainty
765 // dscc - output serializer
769 } else if (pixelFormat == dm_n422) {
774 // dscc - input deserializer
776 // dscc - input cdc fifo
778 // dscc - cdc uncertainty
780 // dscc - output cdc fifo
782 // dscc - cdc uncertainty
784 // dscc - output serializer
793 // dscc - input deserializer
795 // dscc - input cdc fifo
797 // dscc - cdc uncertainty
799 // dscc - output cdc fifo
801 // dscc - output serializer
803 // dscc - cdc uncertainty
812 static bool CalculatePrefetchSchedule(
813 struct display_mode_lib *mode_lib,
814 double HostVMInefficiencyFactor,
816 unsigned int DSCDelay,
817 double DPPCLKDelaySubtotalPlusCNVCFormater,
818 double DPPCLKDelaySCL,
819 double DPPCLKDelaySCLLBOnly,
820 double DPPCLKDelayCNVCCursor,
821 double DISPCLKDelaySubtotal,
822 unsigned int DPP_RECOUT_WIDTH,
823 enum output_format_class OutputFormat,
824 unsigned int MaxInterDCNTileRepeaters,
825 unsigned int VStartup,
826 unsigned int MaxVStartup,
827 unsigned int GPUVMPageTableLevels,
830 unsigned int HostVMMaxNonCachedPageTableLevels,
831 double HostVMMinPageSize,
832 bool DynamicMetadataEnable,
833 bool DynamicMetadataVMEnabled,
834 int DynamicMetadataLinesBeforeActiveRequired,
835 unsigned int DynamicMetadataTransmittedBytes,
836 double UrgentLatency,
837 double UrgentExtraLatency,
839 unsigned int PDEAndMetaPTEBytesFrame,
840 unsigned int MetaRowByte,
841 unsigned int PixelPTEBytesPerRow,
842 double PrefetchSourceLinesY,
843 unsigned int SwathWidthY,
844 double VInitPreFillY,
845 unsigned int MaxNumSwathY,
846 double PrefetchSourceLinesC,
847 unsigned int SwathWidthC,
848 double VInitPreFillC,
849 unsigned int MaxNumSwathC,
850 int swath_width_luma_ub,
851 int swath_width_chroma_ub,
852 unsigned int SwathHeightY,
853 unsigned int SwathHeightC,
855 double *DSTXAfterScaler,
856 double *DSTYAfterScaler,
857 double *DestinationLinesForPrefetch,
858 double *PrefetchBandwidth,
859 double *DestinationLinesToRequestVMInVBlank,
860 double *DestinationLinesToRequestRowInVBlank,
861 double *VRatioPrefetchY,
862 double *VRatioPrefetchC,
863 double *RequiredPrefetchPixDataBWLuma,
864 double *RequiredPrefetchPixDataBWChroma,
865 bool *NotEnoughTimeForDynamicMetadata,
867 double *prefetch_vmrow_bw,
871 int *VUpdateOffsetPix,
872 double *VUpdateWidthPix,
873 double *VReadyOffsetPix)
875 bool MyError = false;
876 unsigned int DPPCycles, DISPCLKCycles;
877 double DSTTotalPixelsAfterScaler;
879 double dst_y_prefetch_equ;
880 #ifdef __DML_VBA_DEBUG__
883 double prefetch_bw_oto;
884 double prefetch_bw_pr;
887 double Tvm_oto_lines;
888 double Tr0_oto_lines;
889 double dst_y_prefetch_oto;
890 double TimeForFetchingMetaPTE = 0;
891 double TimeForFetchingRowInVBlank = 0;
892 double LinesToRequestPrefetchPixelData = 0;
893 unsigned int HostVMDynamicLevelsTrips;
897 double Tvm_trips_rounded;
898 double Tr0_trips_rounded;
901 double prefetch_bw_equ;
907 double prefetch_sw_bytes;
910 int max_vratio_pre = 4;
916 if (GPUVMEnable == true && HostVMEnable == true) {
917 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
919 HostVMDynamicLevelsTrips = 0;
921 #ifdef __DML_VBA_DEBUG__
922 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
924 CalculateVupdateAndDynamicMetadataParameters(
925 MaxInterDCNTileRepeaters,
928 myPipe->DCFCLKDeepSleep,
932 DynamicMetadataTransmittedBytes,
933 DynamicMetadataLinesBeforeActiveRequired,
934 myPipe->InterlaceEnable,
935 myPipe->ProgressiveToInterlaceUnitInOPP,
944 LineTime = myPipe->HTotal / myPipe->PixelClock;
945 trip_to_mem = UrgentLatency;
946 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
948 #ifdef __DML_VBA_ALLOW_DELTA__
949 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
951 if (DynamicMetadataVMEnabled == true) {
953 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
955 *Tdmdl = TWait + UrgentExtraLatency;
958 #ifdef __DML_VBA_ALLOW_DELTA__
959 if (DynamicMetadataEnable == false) {
964 if (DynamicMetadataEnable == true) {
965 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
966 *NotEnoughTimeForDynamicMetadata = true;
967 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
968 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
969 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
970 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
971 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *Tdmdl);
973 *NotEnoughTimeForDynamicMetadata = false;
976 *NotEnoughTimeForDynamicMetadata = false;
979 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
981 if (myPipe->ScalerEnabled)
982 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
984 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
986 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
988 DISPCLKCycles = DISPCLKDelaySubtotal;
990 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
993 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
995 #ifdef __DML_VBA_DEBUG__
996 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
997 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
998 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
999 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
1000 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
1001 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
1002 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
1003 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
1006 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1008 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
1009 *DSTYAfterScaler = 1;
1011 *DSTYAfterScaler = 0;
1013 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1014 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1015 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1017 #ifdef __DML_VBA_DEBUG__
1018 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
1023 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1024 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1025 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1027 #ifdef __DML_VBA_ALLOW_DELTA__
1028 if (!myPipe->DCCEnable) {
1030 Tr0_trips_rounded = 0.0;
1036 Tvm_trips_rounded = 0.0;
1040 if (GPUVMPageTableLevels >= 3) {
1041 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1045 } else if (!myPipe->DCCEnable) {
1048 *Tno_bw = LineTime / 4;
1051 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
1052 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
1054 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
1056 prefetch_bw_pr = bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane;
1057 prefetch_bw_pr = dml_min(1, myPipe->VRatio) * prefetch_bw_pr;
1058 max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
1059 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
1060 prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw);
1062 min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre);
1063 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
1064 #ifdef __DML_VBA_DEBUG__
1065 Tsw_oto = Lsw_oto * LineTime;
1069 #ifdef __DML_VBA_DEBUG__
1070 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
1071 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
1072 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
1073 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
1074 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
1075 dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
1078 if (GPUVMEnable == true)
1079 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0);
1081 Tvm_oto = LineTime / 4.0;
1083 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1084 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term)
1088 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1091 #ifdef __DML_VBA_DEBUG__
1092 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
1093 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
1094 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
1095 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1096 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1097 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1098 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
1099 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
1100 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
1103 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1104 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1105 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1106 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1107 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1108 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1110 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
1112 if (prefetch_sw_bytes < dep_bytes)
1113 prefetch_sw_bytes = 2 * dep_bytes;
1115 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1116 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
1117 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
1118 dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
1119 dml_print("DML: LineTime: %f\n", LineTime);
1120 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
1122 dml_print("DML: LineTime: %f\n", LineTime);
1123 dml_print("DML: VStartup: %d\n", VStartup);
1124 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1125 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
1126 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1127 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1128 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1129 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1130 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1131 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm);
1132 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1133 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler);
1134 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler \n", *DSTYAfterScaler);
1136 *PrefetchBandwidth = 0;
1137 *DestinationLinesToRequestVMInVBlank = 0;
1138 *DestinationLinesToRequestRowInVBlank = 0;
1139 *VRatioPrefetchY = 0;
1140 *VRatioPrefetchC = 0;
1141 *RequiredPrefetchPixDataBWLuma = 0;
1142 if (dst_y_prefetch_equ > 1) {
1143 double PrefetchBandwidth1;
1144 double PrefetchBandwidth2;
1145 double PrefetchBandwidth3;
1146 double PrefetchBandwidth4;
1148 if (Tpre_rounded - *Tno_bw > 0) {
1149 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1150 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
1151 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
1153 PrefetchBandwidth1 = 0;
1156 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
1157 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1158 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
1161 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1162 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1164 PrefetchBandwidth2 = 0;
1166 if (Tpre_rounded - Tvm_trips_rounded > 0) {
1167 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1168 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
1169 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
1171 PrefetchBandwidth3 = 0;
1174 #ifdef __DML_VBA_DEBUG__
1175 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
1176 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
1177 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
1179 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1180 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1181 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
1184 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1185 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1187 PrefetchBandwidth4 = 0;
1194 if (PrefetchBandwidth1 > 0) {
1195 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
1196 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1205 if (PrefetchBandwidth2 > 0) {
1206 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
1207 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1216 if (PrefetchBandwidth3 > 0) {
1217 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
1218 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1228 prefetch_bw_equ = PrefetchBandwidth1;
1229 } else if (Case2OK) {
1230 prefetch_bw_equ = PrefetchBandwidth2;
1231 } else if (Case3OK) {
1232 prefetch_bw_equ = PrefetchBandwidth3;
1234 prefetch_bw_equ = PrefetchBandwidth4;
1237 #ifdef __DML_VBA_DEBUG__
1238 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
1239 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
1240 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
1241 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
1244 if (prefetch_bw_equ > 0) {
1245 if (GPUVMEnable == true) {
1246 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1248 Tvm_equ = LineTime / 4;
1251 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1253 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1255 (LineTime - Tvm_equ) / 2,
1258 Tr0_equ = (LineTime - Tvm_equ) / 2;
1263 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1267 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1268 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1269 TimeForFetchingMetaPTE = Tvm_oto;
1270 TimeForFetchingRowInVBlank = Tr0_oto;
1271 *PrefetchBandwidth = prefetch_bw_oto;
1273 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1274 TimeForFetchingMetaPTE = Tvm_equ;
1275 TimeForFetchingRowInVBlank = Tr0_equ;
1276 *PrefetchBandwidth = prefetch_bw_equ;
1279 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1281 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1283 #ifdef __DML_VBA_ALLOW_DELTA__
1284 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
1285 // See note above dated 5/30/2018
1286 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
1287 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
1289 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
1292 #ifdef __DML_VBA_DEBUG__
1293 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
1294 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1295 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
1296 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1297 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1298 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1299 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
1302 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1304 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
1305 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1306 #ifdef __DML_VBA_DEBUG__
1307 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1308 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
1309 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
1311 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1312 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1313 *VRatioPrefetchY = dml_max(
1314 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1315 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1316 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1319 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1320 *VRatioPrefetchY = 0;
1322 #ifdef __DML_VBA_DEBUG__
1323 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1324 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1325 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
1329 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1330 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1332 #ifdef __DML_VBA_DEBUG__
1333 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1334 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
1335 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
1337 if ((SwathHeightC > 4) || VInitPreFillC > 3) {
1338 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1339 *VRatioPrefetchC = dml_max(
1341 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1342 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1345 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1346 *VRatioPrefetchC = 0;
1348 #ifdef __DML_VBA_DEBUG__
1349 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1350 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
1351 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
1355 #ifdef __DML_VBA_DEBUG__
1356 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
1357 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
1358 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1361 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
1363 #ifdef __DML_VBA_DEBUG__
1364 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
1367 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
1371 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1372 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1373 *VRatioPrefetchY = 0;
1374 *VRatioPrefetchC = 0;
1375 *RequiredPrefetchPixDataBWLuma = 0;
1376 *RequiredPrefetchPixDataBWChroma = 0;
1380 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
1381 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1382 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1383 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1385 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
1386 (double) LinesToRequestPrefetchPixelData * LineTime);
1387 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
1388 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) /
1389 (double) myPipe->HTotal)) * LineTime);
1390 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1391 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n",
1392 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
1393 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
1394 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1398 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1402 double prefetch_vm_bw;
1403 double prefetch_row_bw;
1405 if (PDEAndMetaPTEBytesFrame == 0) {
1407 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1408 #ifdef __DML_VBA_DEBUG__
1409 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1410 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1411 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1412 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1414 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1415 #ifdef __DML_VBA_DEBUG__
1416 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1421 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1424 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1425 prefetch_row_bw = 0;
1426 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1427 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1429 #ifdef __DML_VBA_DEBUG__
1430 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1431 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1432 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1433 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1436 prefetch_row_bw = 0;
1438 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1441 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1445 *PrefetchBandwidth = 0;
1446 TimeForFetchingMetaPTE = 0;
1447 TimeForFetchingRowInVBlank = 0;
1448 *DestinationLinesToRequestVMInVBlank = 0;
1449 *DestinationLinesToRequestRowInVBlank = 0;
1450 *DestinationLinesForPrefetch = 0;
1451 LinesToRequestPrefetchPixelData = 0;
1452 *VRatioPrefetchY = 0;
1453 *VRatioPrefetchC = 0;
1454 *RequiredPrefetchPixDataBWLuma = 0;
1455 *RequiredPrefetchPixDataBWChroma = 0;
1461 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1463 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1466 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1468 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1471 static void CalculateDCCConfiguration(
1473 bool DCCProgrammingAssumesScanDirectionUnknown,
1474 enum source_format_class SourcePixelFormat,
1475 unsigned int SurfaceWidthLuma,
1476 unsigned int SurfaceWidthChroma,
1477 unsigned int SurfaceHeightLuma,
1478 unsigned int SurfaceHeightChroma,
1479 double DETBufferSize,
1480 unsigned int RequestHeight256ByteLuma,
1481 unsigned int RequestHeight256ByteChroma,
1482 enum dm_swizzle_mode TilingFormat,
1483 unsigned int BytePerPixelY,
1484 unsigned int BytePerPixelC,
1485 double BytePerPixelDETY,
1486 double BytePerPixelDETC,
1487 enum scan_direction_class ScanOrientation,
1488 unsigned int *MaxUncompressedBlockLuma,
1489 unsigned int *MaxUncompressedBlockChroma,
1490 unsigned int *MaxCompressedBlockLuma,
1491 unsigned int *MaxCompressedBlockChroma,
1492 unsigned int *IndependentBlockLuma,
1493 unsigned int *IndependentBlockChroma)
1502 double detile_buf_vp_horz_limit;
1503 double detile_buf_vp_vert_limit;
1505 int MAS_vp_horz_limit;
1506 int MAS_vp_vert_limit;
1507 int max_vp_horz_width;
1508 int max_vp_vert_height;
1509 int eff_surf_width_l;
1510 int eff_surf_width_c;
1511 int eff_surf_height_l;
1512 int eff_surf_height_c;
1514 int full_swath_bytes_horz_wc_l;
1515 int full_swath_bytes_horz_wc_c;
1516 int full_swath_bytes_vert_wc_l;
1517 int full_swath_bytes_vert_wc_c;
1518 int req128_horz_wc_l;
1519 int req128_horz_wc_c;
1520 int req128_vert_wc_l;
1521 int req128_vert_wc_c;
1522 int segment_order_horz_contiguous_luma;
1523 int segment_order_horz_contiguous_chroma;
1524 int segment_order_vert_contiguous_luma;
1525 int segment_order_vert_contiguous_chroma;
1528 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
1530 RequestType RequestLuma;
1531 RequestType RequestChroma;
1533 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1539 if (BytePerPixelY == 1)
1541 if (BytePerPixelC == 1)
1543 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1545 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1548 if (BytePerPixelC == 0) {
1549 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1550 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
1551 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1553 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1554 detile_buf_vp_horz_limit = (double) swath_buf_size
1555 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
1556 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
1557 detile_buf_vp_vert_limit = (double) swath_buf_size
1558 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
1561 if (SourcePixelFormat == dm_420_10) {
1562 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1563 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1566 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1567 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1569 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
1570 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1571 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1572 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1573 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1574 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1575 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
1576 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1578 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1579 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1580 if (BytePerPixelC > 0) {
1581 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
1582 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1584 full_swath_bytes_horz_wc_c = 0;
1585 full_swath_bytes_vert_wc_c = 0;
1588 if (SourcePixelFormat == dm_420_10) {
1589 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1590 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1591 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1592 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1595 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1596 req128_horz_wc_l = 0;
1597 req128_horz_wc_c = 0;
1598 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
1599 req128_horz_wc_l = 0;
1600 req128_horz_wc_c = 1;
1601 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1602 req128_horz_wc_l = 1;
1603 req128_horz_wc_c = 0;
1605 req128_horz_wc_l = 1;
1606 req128_horz_wc_c = 1;
1609 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1610 req128_vert_wc_l = 0;
1611 req128_vert_wc_c = 0;
1612 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
1613 req128_vert_wc_l = 0;
1614 req128_vert_wc_c = 1;
1615 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1616 req128_vert_wc_l = 1;
1617 req128_vert_wc_c = 0;
1619 req128_vert_wc_l = 1;
1620 req128_vert_wc_c = 1;
1623 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1624 segment_order_horz_contiguous_luma = 0;
1626 segment_order_horz_contiguous_luma = 1;
1628 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1629 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1630 segment_order_vert_contiguous_luma = 0;
1632 segment_order_vert_contiguous_luma = 1;
1634 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1635 segment_order_horz_contiguous_chroma = 0;
1637 segment_order_horz_contiguous_chroma = 1;
1639 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1640 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1641 segment_order_vert_contiguous_chroma = 0;
1643 segment_order_vert_contiguous_chroma = 1;
1646 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1647 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1648 RequestLuma = REQ_256Bytes;
1649 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1650 RequestLuma = REQ_128BytesNonContiguous;
1652 RequestLuma = REQ_128BytesContiguous;
1654 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1655 RequestChroma = REQ_256Bytes;
1656 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
1657 RequestChroma = REQ_128BytesNonContiguous;
1659 RequestChroma = REQ_128BytesContiguous;
1661 } else if (ScanOrientation != dm_vert) {
1662 if (req128_horz_wc_l == 0) {
1663 RequestLuma = REQ_256Bytes;
1664 } else if (segment_order_horz_contiguous_luma == 0) {
1665 RequestLuma = REQ_128BytesNonContiguous;
1667 RequestLuma = REQ_128BytesContiguous;
1669 if (req128_horz_wc_c == 0) {
1670 RequestChroma = REQ_256Bytes;
1671 } else if (segment_order_horz_contiguous_chroma == 0) {
1672 RequestChroma = REQ_128BytesNonContiguous;
1674 RequestChroma = REQ_128BytesContiguous;
1677 if (req128_vert_wc_l == 0) {
1678 RequestLuma = REQ_256Bytes;
1679 } else if (segment_order_vert_contiguous_luma == 0) {
1680 RequestLuma = REQ_128BytesNonContiguous;
1682 RequestLuma = REQ_128BytesContiguous;
1684 if (req128_vert_wc_c == 0) {
1685 RequestChroma = REQ_256Bytes;
1686 } else if (segment_order_vert_contiguous_chroma == 0) {
1687 RequestChroma = REQ_128BytesNonContiguous;
1689 RequestChroma = REQ_128BytesContiguous;
1693 if (RequestLuma == REQ_256Bytes) {
1694 *MaxUncompressedBlockLuma = 256;
1695 *MaxCompressedBlockLuma = 256;
1696 *IndependentBlockLuma = 0;
1697 } else if (RequestLuma == REQ_128BytesContiguous) {
1698 *MaxUncompressedBlockLuma = 256;
1699 *MaxCompressedBlockLuma = 128;
1700 *IndependentBlockLuma = 128;
1702 *MaxUncompressedBlockLuma = 256;
1703 *MaxCompressedBlockLuma = 64;
1704 *IndependentBlockLuma = 64;
1707 if (RequestChroma == REQ_256Bytes) {
1708 *MaxUncompressedBlockChroma = 256;
1709 *MaxCompressedBlockChroma = 256;
1710 *IndependentBlockChroma = 0;
1711 } else if (RequestChroma == REQ_128BytesContiguous) {
1712 *MaxUncompressedBlockChroma = 256;
1713 *MaxCompressedBlockChroma = 128;
1714 *IndependentBlockChroma = 128;
1716 *MaxUncompressedBlockChroma = 256;
1717 *MaxCompressedBlockChroma = 64;
1718 *IndependentBlockChroma = 64;
1721 if (DCCEnabled != true || BytePerPixelC == 0) {
1722 *MaxUncompressedBlockChroma = 0;
1723 *MaxCompressedBlockChroma = 0;
1724 *IndependentBlockChroma = 0;
1727 if (DCCEnabled != true) {
1728 *MaxUncompressedBlockLuma = 0;
1729 *MaxCompressedBlockLuma = 0;
1730 *IndependentBlockLuma = 0;
1734 static double CalculatePrefetchSourceLines(
1735 struct display_mode_lib *mode_lib,
1739 bool ProgressiveToInterlaceUnitInOPP,
1740 unsigned int SwathHeight,
1741 unsigned int ViewportYStart,
1742 double *VInitPreFill,
1743 unsigned int *MaxNumSwath)
1745 struct vba_vars_st *v = &mode_lib->vba;
1746 unsigned int MaxPartialSwath;
1748 if (ProgressiveToInterlaceUnitInOPP)
1749 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1751 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1753 if (!v->IgnoreViewportPositioning) {
1755 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1757 if (*VInitPreFill > 1.0)
1758 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1760 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
1761 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1765 if (ViewportYStart != 0)
1766 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1768 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1770 if (*VInitPreFill > 1.0)
1771 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1773 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
1776 #ifdef __DML_VBA_DEBUG__
1777 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
1778 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
1779 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
1780 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
1781 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
1782 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
1783 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
1784 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
1785 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
1787 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1790 static unsigned int CalculateVMAndRowBytes(
1791 struct display_mode_lib *mode_lib,
1793 unsigned int BlockHeight256Bytes,
1794 unsigned int BlockWidth256Bytes,
1795 enum source_format_class SourcePixelFormat,
1796 unsigned int SurfaceTiling,
1797 unsigned int BytePerPixel,
1798 enum scan_direction_class ScanDirection,
1799 unsigned int SwathWidth,
1800 unsigned int ViewportHeight,
1803 unsigned int HostVMMaxNonCachedPageTableLevels,
1804 unsigned int GPUVMMinPageSize,
1805 unsigned int HostVMMinPageSize,
1806 unsigned int PTEBufferSizeInRequests,
1808 unsigned int DCCMetaPitch,
1809 unsigned int *MacroTileWidth,
1810 unsigned int *MetaRowByte,
1811 unsigned int *PixelPTEBytesPerRow,
1812 bool *PTEBufferSizeNotExceeded,
1813 int *dpte_row_width_ub,
1814 unsigned int *dpte_row_height,
1815 unsigned int *MetaRequestWidth,
1816 unsigned int *MetaRequestHeight,
1817 unsigned int *meta_row_width,
1818 unsigned int *meta_row_height,
1819 int *vm_group_bytes,
1820 unsigned int *dpte_group_bytes,
1821 unsigned int *PixelPTEReqWidth,
1822 unsigned int *PixelPTEReqHeight,
1823 unsigned int *PTERequestSize,
1824 int *DPDE0BytesFrame,
1825 int *MetaPTEBytesFrame)
1827 struct vba_vars_st *v = &mode_lib->vba;
1828 unsigned int MPDEBytesFrame;
1829 unsigned int DCCMetaSurfaceBytes;
1830 unsigned int MacroTileSizeBytes;
1831 unsigned int MacroTileHeight;
1832 unsigned int ExtraDPDEBytesFrame;
1833 unsigned int PDEAndMetaPTEBytesFrame;
1834 unsigned int PixelPTEReqHeightPTEs = 0;
1835 unsigned int HostVMDynamicLevels = 0;
1836 double FractionOfPTEReturnDrop;
1838 if (GPUVMEnable == true && HostVMEnable == true) {
1839 if (HostVMMinPageSize < 2048) {
1840 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1841 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1842 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1844 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1848 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1849 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1850 if (ScanDirection != dm_vert) {
1851 *meta_row_height = *MetaRequestHeight;
1852 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
1853 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1855 *meta_row_height = *MetaRequestWidth;
1856 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
1857 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1859 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1860 if (GPUVMEnable == true) {
1861 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1862 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
1864 *MetaPTEBytesFrame = 0;
1868 if (DCCEnable != true) {
1869 *MetaPTEBytesFrame = 0;
1874 if (SurfaceTiling == dm_sw_linear) {
1875 MacroTileSizeBytes = 256;
1876 MacroTileHeight = BlockHeight256Bytes;
1878 MacroTileSizeBytes = 65536;
1879 MacroTileHeight = 16 * BlockHeight256Bytes;
1881 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1883 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
1884 if (ScanDirection != dm_vert) {
1885 *DPDE0BytesFrame = 64
1887 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1891 *DPDE0BytesFrame = 64
1893 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1897 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
1899 *DPDE0BytesFrame = 0;
1900 ExtraDPDEBytesFrame = 0;
1903 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
1905 #ifdef __DML_VBA_DEBUG__
1906 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
1907 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
1908 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
1909 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
1910 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1913 if (HostVMEnable == true) {
1914 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1916 #ifdef __DML_VBA_DEBUG__
1917 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1920 if (SurfaceTiling == dm_sw_linear) {
1921 PixelPTEReqHeightPTEs = 1;
1922 *PixelPTEReqHeight = 1;
1923 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1924 *PTERequestSize = 64;
1925 FractionOfPTEReturnDrop = 0;
1926 } else if (MacroTileSizeBytes == 4096) {
1927 PixelPTEReqHeightPTEs = 1;
1928 *PixelPTEReqHeight = MacroTileHeight;
1929 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1930 *PTERequestSize = 64;
1931 if (ScanDirection != dm_vert)
1932 FractionOfPTEReturnDrop = 0;
1934 FractionOfPTEReturnDrop = 7 / 8;
1935 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1936 PixelPTEReqHeightPTEs = 16;
1937 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1938 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1939 *PTERequestSize = 128;
1940 FractionOfPTEReturnDrop = 0;
1942 PixelPTEReqHeightPTEs = 1;
1943 *PixelPTEReqHeight = MacroTileHeight;
1944 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1945 *PTERequestSize = 64;
1946 FractionOfPTEReturnDrop = 0;
1949 if (SurfaceTiling == dm_sw_linear) {
1950 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
1951 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1952 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1953 } else if (ScanDirection != dm_vert) {
1954 *dpte_row_height = *PixelPTEReqHeight;
1955 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1956 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1958 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1959 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1960 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1963 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
1964 *PTEBufferSizeNotExceeded = true;
1966 *PTEBufferSizeNotExceeded = false;
1969 if (GPUVMEnable != true) {
1970 *PixelPTEBytesPerRow = 0;
1971 *PTEBufferSizeNotExceeded = true;
1974 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
1976 if (HostVMEnable == true) {
1977 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
1980 if (HostVMEnable == true) {
1981 *vm_group_bytes = 512;
1982 *dpte_group_bytes = 512;
1983 } else if (GPUVMEnable == true) {
1984 *vm_group_bytes = 2048;
1985 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
1986 *dpte_group_bytes = 512;
1988 *dpte_group_bytes = 2048;
1991 *vm_group_bytes = 0;
1992 *dpte_group_bytes = 0;
1994 return PDEAndMetaPTEBytesFrame;
1997 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
1999 struct vba_vars_st *v = &mode_lib->vba;
2001 double HostVMInefficiencyFactor = 1.0;
2002 bool NoChromaPlanes = true;
2004 double VMDataOnlyReturnBW;
2005 double MaxTotalRDBandwidth = 0;
2006 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
2008 v->WritebackDISPCLK = 0.0;
2009 v->DISPCLKWithRamping = 0;
2010 v->DISPCLKWithoutRamping = 0;
2011 v->GlobalDPPCLK = 0.0;
2012 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */
2014 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
2015 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
2016 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
2017 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
2018 if (v->HostVMEnable != true) {
2019 v->ReturnBW = dml_min(
2020 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2021 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
2023 v->ReturnBW = dml_min(
2024 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2025 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
2028 /* End DAL custom code */
2030 // DISPCLK and DPPCLK Calculation
2032 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2033 if (v->WritebackEnable[k]) {
2034 v->WritebackDISPCLK = dml_max(
2035 v->WritebackDISPCLK,
2036 dml31_CalculateWriteBackDISPCLK(
2037 v->WritebackPixelFormat[k],
2039 v->WritebackHRatio[k],
2040 v->WritebackVRatio[k],
2041 v->WritebackHTaps[k],
2042 v->WritebackVTaps[k],
2043 v->WritebackSourceWidth[k],
2044 v->WritebackDestinationWidth[k],
2046 v->WritebackLineBufferSize));
2050 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2051 if (v->HRatio[k] > 1) {
2052 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
2053 v->MaxDCHUBToPSCLThroughput,
2054 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
2056 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2059 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
2061 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
2062 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
2064 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
2065 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
2068 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
2069 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
2070 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
2071 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
2073 if (v->HRatioChroma[k] > 1) {
2074 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
2075 v->MaxDCHUBToPSCLThroughput,
2076 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
2078 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2080 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2082 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
2083 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
2086 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
2087 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
2090 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma);
2094 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2095 if (v->BlendingAndTiming[k] != k)
2097 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2098 v->DISPCLKWithRamping = dml_max(
2099 v->DISPCLKWithRamping,
2100 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2101 * (1 + v->DISPCLKRampingMargin / 100));
2102 v->DISPCLKWithoutRamping = dml_max(
2103 v->DISPCLKWithoutRamping,
2104 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2105 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2106 v->DISPCLKWithRamping = dml_max(
2107 v->DISPCLKWithRamping,
2108 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2109 * (1 + v->DISPCLKRampingMargin / 100));
2110 v->DISPCLKWithoutRamping = dml_max(
2111 v->DISPCLKWithoutRamping,
2112 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2114 v->DISPCLKWithRamping = dml_max(
2115 v->DISPCLKWithRamping,
2116 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
2117 v->DISPCLKWithoutRamping = dml_max(
2118 v->DISPCLKWithoutRamping,
2119 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2123 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK);
2124 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK);
2126 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2127 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed);
2128 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed);
2129 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2130 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
2131 v->DISPCLKDPPCLKVCOSpeed);
2132 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2133 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2134 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2135 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2137 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
2139 v->DISPCLK = v->DISPCLK_calculated;
2140 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2142 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2143 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2144 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]);
2146 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed);
2147 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2148 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1);
2149 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2152 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2153 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2156 // Urgent and B P-State/DRAM Clock Change Watermark
2157 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2158 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2160 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2161 dml30_CalculateBytePerPixelAnd256BBlockSizes(
2162 v->SourcePixelFormat[k],
2163 v->SurfaceTiling[k],
2164 &v->BytePerPixelY[k],
2165 &v->BytePerPixelC[k],
2166 &v->BytePerPixelDETY[k],
2167 &v->BytePerPixelDETC[k],
2168 &v->BlockHeight256BytesY[k],
2169 &v->BlockHeight256BytesC[k],
2170 &v->BlockWidth256BytesY[k],
2171 &v->BlockWidth256BytesC[k]);
2174 CalculateSwathWidth(
2176 v->NumberOfActivePlanes,
2177 v->SourcePixelFormat,
2185 v->ODMCombineEnabled,
2188 v->BlockHeight256BytesY,
2189 v->BlockHeight256BytesC,
2190 v->BlockWidth256BytesY,
2191 v->BlockWidth256BytesC,
2192 v->BlendingAndTiming,
2196 v->SwathWidthSingleDPPY,
2197 v->SwathWidthSingleDPPC,
2202 v->swath_width_luma_ub,
2203 v->swath_width_chroma_ub);
2205 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2206 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
2208 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
2209 * v->VRatioChroma[k];
2210 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2213 // DCFCLK Deep Sleep
2214 CalculateDCFCLKDeepSleep(
2216 v->NumberOfActivePlanes,
2227 v->PSCL_THROUGHPUT_LUMA,
2228 v->PSCL_THROUGHPUT_CHROMA,
2230 v->ReadBandwidthPlaneLuma,
2231 v->ReadBandwidthPlaneChroma,
2233 &v->DCFCLKDeepSleep);
2236 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2237 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2238 v->DSCCLK_calculated[k] = 0.0;
2240 if (v->OutputFormat[k] == dm_420)
2241 v->DSCFormatFactor = 2;
2242 else if (v->OutputFormat[k] == dm_444)
2243 v->DSCFormatFactor = 1;
2244 else if (v->OutputFormat[k] == dm_n422)
2245 v->DSCFormatFactor = 2;
2247 v->DSCFormatFactor = 1;
2248 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2249 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
2250 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2251 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2252 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
2253 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2255 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
2256 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2261 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2262 double BPP = v->OutputBpp[k];
2264 if (v->DSCEnabled[k] && BPP != 0) {
2265 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2266 v->DSCDelay[k] = dscceComputeDelay(
2267 v->DSCInputBitPerComponent[k],
2269 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2270 v->NumberOfDSCSlices[k],
2272 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2273 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2275 * (dscceComputeDelay(
2276 v->DSCInputBitPerComponent[k],
2278 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2279 v->NumberOfDSCSlices[k] / 2.0,
2281 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2284 * (dscceComputeDelay(
2285 v->DSCInputBitPerComponent[k],
2287 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2288 v->NumberOfDSCSlices[k] / 4.0,
2290 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2292 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2298 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2299 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2300 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
2301 v->DSCDelay[k] = v->DSCDelay[j];
2304 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2305 unsigned int PDEAndMetaPTEBytesFrameY;
2306 unsigned int PixelPTEBytesPerRowY;
2307 unsigned int MetaRowByteY;
2308 unsigned int MetaRowByteC;
2309 unsigned int PDEAndMetaPTEBytesFrameC;
2310 unsigned int PixelPTEBytesPerRowC;
2311 bool PTEBufferSizeNotExceededY;
2312 bool PTEBufferSizeNotExceededC;
2314 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2315 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2316 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2317 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2318 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2320 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2321 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2324 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2327 v->BlockHeight256BytesC[k],
2328 v->BlockWidth256BytesC[k],
2329 v->SourcePixelFormat[k],
2330 v->SurfaceTiling[k],
2331 v->BytePerPixelC[k],
2334 v->ViewportHeightChroma[k],
2337 v->HostVMMaxNonCachedPageTableLevels,
2338 v->GPUVMMinPageSize,
2339 v->HostVMMinPageSize,
2340 v->PTEBufferSizeInRequestsForChroma,
2342 v->DCCMetaPitchC[k],
2343 &v->MacroTileWidthC[k],
2345 &PixelPTEBytesPerRowC,
2346 &PTEBufferSizeNotExceededC,
2347 &v->dpte_row_width_chroma_ub[k],
2348 &v->dpte_row_height_chroma[k],
2349 &v->meta_req_width_chroma[k],
2350 &v->meta_req_height_chroma[k],
2351 &v->meta_row_width_chroma[k],
2352 &v->meta_row_height_chroma[k],
2355 &v->PixelPTEReqWidthC[k],
2356 &v->PixelPTEReqHeightC[k],
2357 &v->PTERequestSizeC[k],
2358 &v->dpde0_bytes_per_frame_ub_c[k],
2359 &v->meta_pte_bytes_per_frame_ub_c[k]);
2361 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2366 v->ProgressiveToInterlaceUnitInOPP,
2368 v->ViewportYStartC[k],
2369 &v->VInitPreFillC[k],
2370 &v->MaxNumSwathC[k]);
2372 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2373 v->PTEBufferSizeInRequestsForChroma = 0;
2374 PixelPTEBytesPerRowC = 0;
2375 PDEAndMetaPTEBytesFrameC = 0;
2377 v->MaxNumSwathC[k] = 0;
2378 v->PrefetchSourceLinesC[k] = 0;
2381 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2384 v->BlockHeight256BytesY[k],
2385 v->BlockWidth256BytesY[k],
2386 v->SourcePixelFormat[k],
2387 v->SurfaceTiling[k],
2388 v->BytePerPixelY[k],
2391 v->ViewportHeight[k],
2394 v->HostVMMaxNonCachedPageTableLevels,
2395 v->GPUVMMinPageSize,
2396 v->HostVMMinPageSize,
2397 v->PTEBufferSizeInRequestsForLuma,
2399 v->DCCMetaPitchY[k],
2400 &v->MacroTileWidthY[k],
2402 &PixelPTEBytesPerRowY,
2403 &PTEBufferSizeNotExceededY,
2404 &v->dpte_row_width_luma_ub[k],
2405 &v->dpte_row_height[k],
2406 &v->meta_req_width[k],
2407 &v->meta_req_height[k],
2408 &v->meta_row_width[k],
2409 &v->meta_row_height[k],
2410 &v->vm_group_bytes[k],
2411 &v->dpte_group_bytes[k],
2412 &v->PixelPTEReqWidthY[k],
2413 &v->PixelPTEReqHeightY[k],
2414 &v->PTERequestSizeY[k],
2415 &v->dpde0_bytes_per_frame_ub_l[k],
2416 &v->meta_pte_bytes_per_frame_ub_l[k]);
2418 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2423 v->ProgressiveToInterlaceUnitInOPP,
2425 v->ViewportYStartY[k],
2426 &v->VInitPreFillY[k],
2427 &v->MaxNumSwathY[k]);
2428 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2429 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2430 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2432 CalculateRowBandwidth(
2434 v->SourcePixelFormat[k],
2438 v->HTotal[k] / v->PixelClock[k],
2441 v->meta_row_height[k],
2442 v->meta_row_height_chroma[k],
2443 PixelPTEBytesPerRowY,
2444 PixelPTEBytesPerRowC,
2445 v->dpte_row_height[k],
2446 v->dpte_row_height_chroma[k],
2448 &v->dpte_row_bw[k]);
2451 v->TotalDCCActiveDPP = 0;
2452 v->TotalActiveDPP = 0;
2453 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2454 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
2455 if (v->DCCEnable[k])
2456 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
2457 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2458 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
2459 NoChromaPlanes = false;
2462 ReorderBytes = v->NumberOfChannels
2464 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2465 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2466 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2468 VMDataOnlyReturnBW = dml_min(
2469 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn)
2470 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2471 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
2472 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
2474 #ifdef __DML_VBA_DEBUG__
2475 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
2476 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
2477 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
2478 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
2479 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
2480 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
2481 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
2482 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
2483 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
2484 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
2485 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
2488 if (v->GPUVMEnable && v->HostVMEnable)
2489 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
2491 v->UrgentExtraLatency = CalculateExtraLatency(
2492 v->RoundTripPingLatencyCycles,
2496 v->PixelChunkSizeInKByte,
2497 v->TotalDCCActiveDPP,
2502 v->NumberOfActivePlanes,
2504 v->dpte_group_bytes,
2505 HostVMInefficiencyFactor,
2506 v->HostVMMinPageSize,
2507 v->HostVMMaxNonCachedPageTableLevels);
2509 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2511 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2512 if (v->BlendingAndTiming[k] == k) {
2513 if (v->WritebackEnable[k] == true) {
2514 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
2515 + CalculateWriteBackDelay(
2516 v->WritebackPixelFormat[k],
2517 v->WritebackHRatio[k],
2518 v->WritebackVRatio[k],
2519 v->WritebackVTaps[k],
2520 v->WritebackDestinationWidth[k],
2521 v->WritebackDestinationHeight[k],
2522 v->WritebackSourceHeight[k],
2523 v->HTotal[k]) / v->DISPCLK;
2525 v->WritebackDelay[v->VoltageLevel][k] = 0;
2526 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2527 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
2528 v->WritebackDelay[v->VoltageLevel][k] = dml_max(
2529 v->WritebackDelay[v->VoltageLevel][k],
2531 + CalculateWriteBackDelay(
2532 v->WritebackPixelFormat[j],
2533 v->WritebackHRatio[j],
2534 v->WritebackVRatio[j],
2535 v->WritebackVTaps[j],
2536 v->WritebackDestinationWidth[j],
2537 v->WritebackDestinationHeight[j],
2538 v->WritebackSourceHeight[j],
2539 v->HTotal[k]) / v->DISPCLK);
2545 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2546 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2547 if (v->BlendingAndTiming[k] == j)
2548 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2550 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2551 v->MaxVStartupLines[k] =
2552 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
2553 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
2554 v->VTotal[k] - v->VActive[k]
2558 (double) v->WritebackDelay[v->VoltageLevel][k]
2559 / (v->HTotal[k] / v->PixelClock[k]),
2561 if (v->MaxVStartupLines[k] > 1023)
2562 v->MaxVStartupLines[k] = 1023;
2564 #ifdef __DML_VBA_DEBUG__
2565 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
2566 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
2567 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
2571 v->MaximumMaxVStartupLines = 0;
2572 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2573 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2576 // We don't really care to iterate between the various prefetch modes
2577 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
2579 v->UrgentLatency = CalculateUrgentLatency(
2580 v->UrgentLatencyPixelDataOnly,
2581 v->UrgentLatencyPixelMixedWithVMData,
2582 v->UrgentLatencyVMDataOnly,
2583 v->DoUrgentLatencyAdjustment,
2584 v->UrgentLatencyAdjustmentFabricClockComponent,
2585 v->UrgentLatencyAdjustmentFabricClockReference,
2588 v->FractionOfUrgentBandwidth = 0.0;
2589 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2591 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
2594 double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
2595 bool DestinationLineTimesForPrefetchLessThan2 = false;
2596 bool VRatioPrefetchMoreThan4 = false;
2597 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime);
2598 MaxTotalRDBandwidth = 0;
2600 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
2602 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2605 myPipe.DPPCLK = v->DPPCLK[k];
2606 myPipe.DISPCLK = v->DISPCLK;
2607 myPipe.PixelClock = v->PixelClock[k];
2608 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2609 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2610 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2611 myPipe.VRatio = v->VRatio[k];
2612 myPipe.VRatioChroma = v->VRatioChroma[k];
2613 myPipe.SourceScan = v->SourceScan[k];
2614 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2615 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2616 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2617 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2618 myPipe.InterlaceEnable = v->Interlace[k];
2619 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2620 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2621 myPipe.HTotal = v->HTotal[k];
2622 myPipe.DCCEnable = v->DCCEnable[k];
2623 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
2624 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
2625 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
2626 myPipe.BytePerPixelY = v->BytePerPixelY[k];
2627 myPipe.BytePerPixelC = v->BytePerPixelC[k];
2628 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
2629 v->ErrorResult[k] = CalculatePrefetchSchedule(
2631 HostVMInefficiencyFactor,
2634 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
2636 v->DPPCLKDelaySCLLBOnly,
2637 v->DPPCLKDelayCNVCCursor,
2638 v->DISPCLKDelaySubtotal,
2639 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2641 v->MaxInterDCNTileRepeaters,
2642 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2643 v->MaxVStartupLines[k],
2644 v->GPUVMMaxPageTableLevels,
2647 v->HostVMMaxNonCachedPageTableLevels,
2648 v->HostVMMinPageSize,
2649 v->DynamicMetadataEnable[k],
2650 v->DynamicMetadataVMEnabled,
2651 v->DynamicMetadataLinesBeforeActiveRequired[k],
2652 v->DynamicMetadataTransmittedBytes[k],
2654 v->UrgentExtraLatency,
2656 v->PDEAndMetaPTEBytesFrame[k],
2658 v->PixelPTEBytesPerRow[k],
2659 v->PrefetchSourceLinesY[k],
2661 v->VInitPreFillY[k],
2663 v->PrefetchSourceLinesC[k],
2665 v->VInitPreFillC[k],
2667 v->swath_width_luma_ub[k],
2668 v->swath_width_chroma_ub[k],
2672 &v->DSTXAfterScaler[k],
2673 &v->DSTYAfterScaler[k],
2674 &v->DestinationLinesForPrefetch[k],
2675 &v->PrefetchBandwidth[k],
2676 &v->DestinationLinesToRequestVMInVBlank[k],
2677 &v->DestinationLinesToRequestRowInVBlank[k],
2678 &v->VRatioPrefetchY[k],
2679 &v->VRatioPrefetchC[k],
2680 &v->RequiredPrefetchPixDataBWLuma[k],
2681 &v->RequiredPrefetchPixDataBWChroma[k],
2682 &v->NotEnoughTimeForDynamicMetadata[k],
2684 &v->prefetch_vmrow_bw[k],
2688 &v->VUpdateOffsetPix[k],
2689 &v->VUpdateWidthPix[k],
2690 &v->VReadyOffsetPix[k]);
2692 #ifdef __DML_VBA_DEBUG__
2693 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
2695 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2698 v->NoEnoughUrgentLatencyHiding = false;
2699 v->NoEnoughUrgentLatencyHidingPre = false;
2701 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2702 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2703 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2704 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2705 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
2707 CalculateUrgentBurstFactor(
2708 v->swath_width_luma_ub[k],
2709 v->swath_width_chroma_ub[k],
2712 v->HTotal[k] / v->PixelClock[k],
2714 v->CursorBufferSize,
2715 v->CursorWidth[k][0],
2719 v->BytePerPixelDETY[k],
2720 v->BytePerPixelDETC[k],
2721 v->DETBufferSizeY[k],
2722 v->DETBufferSizeC[k],
2723 &v->UrgBurstFactorCursor[k],
2724 &v->UrgBurstFactorLuma[k],
2725 &v->UrgBurstFactorChroma[k],
2726 &v->NoUrgentLatencyHiding[k]);
2728 CalculateUrgentBurstFactor(
2729 v->swath_width_luma_ub[k],
2730 v->swath_width_chroma_ub[k],
2733 v->HTotal[k] / v->PixelClock[k],
2735 v->CursorBufferSize,
2736 v->CursorWidth[k][0],
2738 v->VRatioPrefetchY[k],
2739 v->VRatioPrefetchC[k],
2740 v->BytePerPixelDETY[k],
2741 v->BytePerPixelDETC[k],
2742 v->DETBufferSizeY[k],
2743 v->DETBufferSizeC[k],
2744 &v->UrgBurstFactorCursorPre[k],
2745 &v->UrgBurstFactorLumaPre[k],
2746 &v->UrgBurstFactorChromaPre[k],
2747 &v->NoUrgentLatencyHidingPre[k]);
2749 MaxTotalRDBandwidth = MaxTotalRDBandwidth
2751 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2752 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2753 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2754 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
2755 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2757 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2758 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2759 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2761 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
2763 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2764 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
2765 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2766 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
2767 + v->cursor_bw_pre[k]);
2769 #ifdef __DML_VBA_DEBUG__
2770 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
2771 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
2772 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
2773 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
2774 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
2776 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
2777 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
2779 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
2780 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
2781 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
2782 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
2783 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
2784 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
2785 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
2786 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
2787 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
2788 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
2791 if (v->DestinationLinesForPrefetch[k] < 2)
2792 DestinationLineTimesForPrefetchLessThan2 = true;
2794 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2795 VRatioPrefetchMoreThan4 = true;
2797 if (v->NoUrgentLatencyHiding[k] == true)
2798 v->NoEnoughUrgentLatencyHiding = true;
2800 if (v->NoUrgentLatencyHidingPre[k] == true)
2801 v->NoEnoughUrgentLatencyHidingPre = true;
2804 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2806 #ifdef __DML_VBA_DEBUG__
2807 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
2808 dml_print("DML::%s: ReturnBW=%f \n", __func__, v->ReturnBW);
2809 dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, v->FractionOfUrgentBandwidth);
2812 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
2813 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
2814 v->PrefetchModeSupported = true;
2816 v->PrefetchModeSupported = false;
2817 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
2818 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
2819 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
2820 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2824 // This error result check was done after the PrefetchModeSupported. So we will
2825 // still try to calculate flip schedule even prefetch mode not supported
2826 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2827 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
2828 v->PrefetchModeSupported = false;
2829 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
2833 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2834 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2835 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2836 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
2838 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2839 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2840 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2842 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2843 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2844 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2847 v->TotImmediateFlipBytes = 0;
2848 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2849 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
2850 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2852 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2853 CalculateFlipSchedule(
2856 HostVMInefficiencyFactor,
2857 v->UrgentExtraLatency,
2859 v->PDEAndMetaPTEBytesFrame[k],
2861 v->PixelPTEBytesPerRow[k]);
2864 v->total_dcn_read_bw_with_flip = 0.0;
2865 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2866 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2867 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
2869 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2870 v->DPPPerPlane[k] * v->final_flip_bw[k]
2871 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
2872 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
2873 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2875 * (v->final_flip_bw[k]
2876 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2877 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2878 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2879 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
2881 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2882 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
2883 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2885 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
2886 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2888 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2890 v->ImmediateFlipSupported = true;
2891 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2892 #ifdef __DML_VBA_DEBUG__
2893 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
2895 v->ImmediateFlipSupported = false;
2896 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2898 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2899 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2900 #ifdef __DML_VBA_DEBUG__
2901 dml_print("DML::%s: Pipe %0d not supporting iflip\n",
2904 v->ImmediateFlipSupported = false;
2908 v->ImmediateFlipSupported = false;
2911 v->PrefetchAndImmediateFlipSupported =
2912 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable
2913 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2914 v->ImmediateFlipSupported)) ? true : false;
2915 #ifdef __DML_VBA_DEBUG__
2916 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
2917 dml_print("DML::%s: ImmediateFlipRequirement[0] %d\n", __func__, v->ImmediateFlipRequirement[0] == dm_immediate_flip_required);
2918 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
2919 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
2920 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
2921 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
2923 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
2925 v->VStartupLines = v->VStartupLines + 1;
2926 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2927 ASSERT(v->PrefetchAndImmediateFlipSupported);
2929 // Unbounded Request Enabled
2930 CalculateUnboundedRequestAndCompressedBufferSize(
2931 v->DETBufferSizeInKByte[0],
2932 v->ConfigReturnBufferSizeInKByte,
2933 v->UseUnboundedRequesting,
2937 v->CompressedBufferSegmentSizeInkByte,
2939 &v->UnboundedRequestEnabled,
2940 &v->CompressedBufferSizeInkByte);
2942 //Watermarks and NB P-State/DRAM Clock Change Support
2944 enum clock_change_support DRAMClockChangeSupport; // dummy
2945 CalculateWatermarksAndDRAMSpeedChangeSupport(
2951 v->UrgentExtraLatency,
2961 v->BytePerPixelDETY,
2962 v->BytePerPixelDETC,
2963 v->UnboundedRequestEnabled,
2964 v->CompressedBufferSizeInkByte,
2965 &DRAMClockChangeSupport,
2966 &v->StutterExitWatermark,
2967 &v->StutterEnterPlusExitWatermark,
2968 &v->Z8StutterExitWatermark,
2969 &v->Z8StutterEnterPlusExitWatermark);
2971 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2972 if (v->WritebackEnable[k] == true) {
2973 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
2975 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
2977 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
2982 //Display Pipeline Delivery Time in Prefetch, Groups
2983 CalculatePixelDeliveryTimes(
2984 v->NumberOfActivePlanes,
2989 v->swath_width_luma_ub,
2990 v->swath_width_chroma_ub,
2995 v->PSCL_THROUGHPUT_LUMA,
2996 v->PSCL_THROUGHPUT_CHROMA,
3003 v->BlockWidth256BytesY,
3004 v->BlockHeight256BytesY,
3005 v->BlockWidth256BytesC,
3006 v->BlockHeight256BytesC,
3007 v->DisplayPipeLineDeliveryTimeLuma,
3008 v->DisplayPipeLineDeliveryTimeChroma,
3009 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
3010 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
3011 v->DisplayPipeRequestDeliveryTimeLuma,
3012 v->DisplayPipeRequestDeliveryTimeChroma,
3013 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
3014 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
3015 v->CursorRequestDeliveryTime,
3016 v->CursorRequestDeliveryTimePrefetch);
3018 CalculateMetaAndPTETimes(
3019 v->NumberOfActivePlanes,
3022 v->MinMetaChunkSizeBytes,
3026 v->DestinationLinesToRequestRowInVBlank,
3027 v->DestinationLinesToRequestRowInImmediateFlip,
3034 v->dpte_row_height_chroma,
3036 v->meta_row_width_chroma,
3038 v->meta_row_height_chroma,
3040 v->meta_req_width_chroma,
3042 v->meta_req_height_chroma,
3043 v->dpte_group_bytes,
3046 v->PixelPTEReqWidthY,
3047 v->PixelPTEReqHeightY,
3048 v->PixelPTEReqWidthC,
3049 v->PixelPTEReqHeightC,
3050 v->dpte_row_width_luma_ub,
3051 v->dpte_row_width_chroma_ub,
3052 v->DST_Y_PER_PTE_ROW_NOM_L,
3053 v->DST_Y_PER_PTE_ROW_NOM_C,
3054 v->DST_Y_PER_META_ROW_NOM_L,
3055 v->DST_Y_PER_META_ROW_NOM_C,
3056 v->TimePerMetaChunkNominal,
3057 v->TimePerChromaMetaChunkNominal,
3058 v->TimePerMetaChunkVBlank,
3059 v->TimePerChromaMetaChunkVBlank,
3060 v->TimePerMetaChunkFlip,
3061 v->TimePerChromaMetaChunkFlip,
3062 v->time_per_pte_group_nom_luma,
3063 v->time_per_pte_group_vblank_luma,
3064 v->time_per_pte_group_flip_luma,
3065 v->time_per_pte_group_nom_chroma,
3066 v->time_per_pte_group_vblank_chroma,
3067 v->time_per_pte_group_flip_chroma);
3069 CalculateVMGroupAndRequestTimes(
3070 v->NumberOfActivePlanes,
3072 v->GPUVMMaxPageTableLevels,
3075 v->DestinationLinesToRequestVMInVBlank,
3076 v->DestinationLinesToRequestVMInImmediateFlip,
3079 v->dpte_row_width_luma_ub,
3080 v->dpte_row_width_chroma_ub,
3082 v->dpde0_bytes_per_frame_ub_l,
3083 v->dpde0_bytes_per_frame_ub_c,
3084 v->meta_pte_bytes_per_frame_ub_l,
3085 v->meta_pte_bytes_per_frame_ub_c,
3086 v->TimePerVMGroupVBlank,
3087 v->TimePerVMGroupFlip,
3088 v->TimePerVMRequestVBlank,
3089 v->TimePerVMRequestFlip);
3092 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3093 if (PrefetchMode == 0) {
3094 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3095 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3096 v->MinTTUVBlank[k] = dml_max(
3097 v->DRAMClockChangeWatermark,
3098 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark));
3099 } else if (PrefetchMode == 1) {
3100 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3101 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3102 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark);
3104 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3105 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3106 v->MinTTUVBlank[k] = v->UrgentWatermark;
3108 if (!v->DynamicMetadataEnable[k])
3109 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
3112 // DCC Configuration
3114 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3115 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3116 v->SourcePixelFormat[k],
3117 v->SurfaceWidthY[k],
3118 v->SurfaceWidthC[k],
3119 v->SurfaceHeightY[k],
3120 v->SurfaceHeightC[k],
3121 v->DETBufferSizeInKByte[0] * 1024,
3122 v->BlockHeight256BytesY[k],
3123 v->BlockHeight256BytesC[k],
3124 v->SurfaceTiling[k],
3125 v->BytePerPixelY[k],
3126 v->BytePerPixelC[k],
3127 v->BytePerPixelDETY[k],
3128 v->BytePerPixelDETC[k],
3130 &v->DCCYMaxUncompressedBlock[k],
3131 &v->DCCCMaxUncompressedBlock[k],
3132 &v->DCCYMaxCompressedBlock[k],
3133 &v->DCCCMaxCompressedBlock[k],
3134 &v->DCCYIndependentBlock[k],
3135 &v->DCCCIndependentBlock[k]);
3138 // VStartup Adjustment
3139 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3140 bool isInterlaceTiming;
3141 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
3142 #ifdef __DML_VBA_DEBUG__
3143 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
3146 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
3148 #ifdef __DML_VBA_DEBUG__
3149 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
3150 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
3151 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3152 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
3155 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
3156 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
3157 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
3160 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
3162 v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((v->VTotal[k] - v->VFrontPorch[k]) / 2.0, 1.0) : v->VTotal[k])
3163 - v->VFrontPorch[k])
3164 + dml_max(1.0, dml_ceil(v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1.0))
3165 + dml_floor(4.0 * v->TSetup[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0;
3167 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
3169 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
3170 <= (isInterlaceTiming ?
3171 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
3172 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
3173 v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
3175 v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
3177 #ifdef __DML_VBA_DEBUG__
3178 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
3179 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
3180 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
3181 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
3182 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
3183 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
3184 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
3185 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
3186 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3187 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
3188 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
3193 //Maximum Bandwidth Used
3194 double TotalWRBandwidth = 0;
3195 double MaxPerPlaneVActiveWRBandwidth = 0;
3196 double WRBandwidth = 0;
3197 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3198 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
3199 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3200 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3201 } else if (v->WritebackEnable[k] == true) {
3202 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3203 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3205 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3206 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3209 v->TotalDataReadBandwidth = 0;
3210 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3211 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
3214 // Stutter Efficiency
3215 CalculateStutterEfficiency(
3217 v->CompressedBufferSizeInkByte,
3218 v->UnboundedRequestEnabled,
3219 v->ConfigReturnBufferSizeInKByte,
3220 v->MetaFIFOSizeInKEntries,
3221 v->ZeroSizeBufferEntries,
3222 v->NumberOfActivePlanes,
3223 v->ROBBufferSizeInKByte,
3224 v->TotalDataReadBandwidth,
3227 v->COMPBUF_RESERVED_SPACE_64B,
3228 v->COMPBUF_RESERVED_SPACE_ZS,
3231 v->SynchronizedVBlank,
3232 v->StutterEnterPlusExitWatermark,
3233 v->Z8StutterEnterPlusExitWatermark,
3234 v->ProgressiveToInterlaceUnitInOPP,
3240 v->BytePerPixelDETY,
3246 v->DCCFractionOfZeroSizeRequestsLuma,
3247 v->DCCFractionOfZeroSizeRequestsChroma,
3253 v->BlockHeight256BytesY,
3254 v->BlockWidth256BytesY,
3255 v->BlockHeight256BytesC,
3256 v->BlockWidth256BytesC,
3257 v->DCCYMaxUncompressedBlock,
3258 v->DCCCMaxUncompressedBlock,
3262 v->ReadBandwidthPlaneLuma,
3263 v->ReadBandwidthPlaneChroma,
3266 &v->StutterEfficiencyNotIncludingVBlank,
3267 &v->StutterEfficiency,
3268 &v->NumberOfStutterBurstsPerFrame,
3269 &v->Z8StutterEfficiencyNotIncludingVBlank,
3270 &v->Z8StutterEfficiency,
3271 &v->Z8NumberOfStutterBurstsPerFrame,
3275 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3277 struct vba_vars_st *v = &mode_lib->vba;
3278 // Display Pipe Configuration
3279 double BytePerPixDETY[DC__NUM_DPP__MAX];
3280 double BytePerPixDETC[DC__NUM_DPP__MAX];
3281 int BytePerPixY[DC__NUM_DPP__MAX];
3282 int BytePerPixC[DC__NUM_DPP__MAX];
3283 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
3284 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
3285 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
3286 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
3287 double dummy1[DC__NUM_DPP__MAX];
3288 double dummy2[DC__NUM_DPP__MAX];
3289 double dummy3[DC__NUM_DPP__MAX];
3290 double dummy4[DC__NUM_DPP__MAX];
3291 int dummy5[DC__NUM_DPP__MAX];
3292 int dummy6[DC__NUM_DPP__MAX];
3293 bool dummy7[DC__NUM_DPP__MAX];
3294 bool dummysinglestring;
3298 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3300 dml30_CalculateBytePerPixelAnd256BBlockSizes(
3301 v->SourcePixelFormat[k],
3302 v->SurfaceTiling[k],
3307 &Read256BytesBlockHeightY[k],
3308 &Read256BytesBlockHeightC[k],
3309 &Read256BytesBlockWidthY[k],
3310 &Read256BytesBlockWidthC[k]);
3313 CalculateSwathAndDETConfiguration(
3315 v->NumberOfActivePlanes,
3316 v->DETBufferSizeInKByte[0],
3320 v->SourcePixelFormat,
3328 Read256BytesBlockHeightY,
3329 Read256BytesBlockHeightC,
3330 Read256BytesBlockWidthY,
3331 Read256BytesBlockWidthC,
3332 v->ODMCombineEnabled,
3333 v->BlendingAndTiming,
3351 &dummysinglestring);
3354 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
3356 if (PrefetchMode == 0) {
3357 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency));
3358 } else if (PrefetchMode == 1) {
3359 return dml_max(SREnterPlusExitTime, UrgentLatency);
3361 return UrgentLatency;
3365 double dml31_CalculateWriteBackDISPCLK(
3366 enum source_format_class WritebackPixelFormat,
3368 double WritebackHRatio,
3369 double WritebackVRatio,
3370 unsigned int WritebackHTaps,
3371 unsigned int WritebackVTaps,
3372 long WritebackSourceWidth,
3373 long WritebackDestinationWidth,
3374 unsigned int HTotal,
3375 unsigned int WritebackLineBufferSize)
3377 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
3379 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3380 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3381 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3382 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3385 static double CalculateWriteBackDelay(
3386 enum source_format_class WritebackPixelFormat,
3387 double WritebackHRatio,
3388 double WritebackVRatio,
3389 unsigned int WritebackVTaps,
3390 int WritebackDestinationWidth,
3391 int WritebackDestinationHeight,
3392 int WritebackSourceHeight,
3393 unsigned int HTotal)
3395 double CalculateWriteBackDelay;
3397 double Output_lines_last_notclamped;
3398 double WritebackVInit;
3400 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3401 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3402 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3403 if (Output_lines_last_notclamped < 0) {
3404 CalculateWriteBackDelay = 0;
3406 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3408 return CalculateWriteBackDelay;
3411 static void CalculateVupdateAndDynamicMetadataParameters(
3412 int MaxInterDCNTileRepeaters,
3415 double DCFClkDeepSleep,
3419 int DynamicMetadataTransmittedBytes,
3420 int DynamicMetadataLinesBeforeActiveRequired,
3421 int InterlaceEnable,
3422 bool ProgressiveToInterlaceUnitInOPP,
3427 int *VUpdateOffsetPix,
3428 double *VUpdateWidthPix,
3429 double *VReadyOffsetPix)
3431 double TotalRepeaterDelayTime;
3433 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3434 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0);
3435 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0);
3436 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3437 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3438 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3439 *Tdmec = HTotal / PixelClock;
3440 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3441 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3443 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3445 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3446 *Tdmsks = *Tdmsks / 2;
3448 #ifdef __DML_VBA_DEBUG__
3449 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3450 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3451 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3455 static void CalculateRowBandwidth(
3457 enum source_format_class SourcePixelFormat,
3459 double VRatioChroma,
3462 unsigned int MetaRowByteLuma,
3463 unsigned int MetaRowByteChroma,
3464 unsigned int meta_row_height_luma,
3465 unsigned int meta_row_height_chroma,
3466 unsigned int PixelPTEBytesPerRowLuma,
3467 unsigned int PixelPTEBytesPerRowChroma,
3468 unsigned int dpte_row_height_luma,
3469 unsigned int dpte_row_height_chroma,
3470 double *meta_row_bw,
3471 double *dpte_row_bw)
3473 if (DCCEnable != true) {
3475 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3476 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
3478 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3481 if (GPUVMEnable != true) {
3483 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3484 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3485 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
3487 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3491 static void CalculateFlipSchedule(
3492 struct display_mode_lib *mode_lib,
3494 double HostVMInefficiencyFactor,
3495 double UrgentExtraLatency,
3496 double UrgentLatency,
3497 double PDEAndMetaPTEBytesPerFrame,
3498 double MetaRowBytes,
3499 double DPTEBytesPerRow)
3501 struct vba_vars_st *v = &mode_lib->vba;
3502 double min_row_time = 0.0;
3503 unsigned int HostVMDynamicLevelsTrips;
3504 double TimeForFetchingMetaPTEImmediateFlip;
3505 double TimeForFetchingRowInVBlankImmediateFlip;
3506 double ImmediateFlipBW;
3507 double LineTime = v->HTotal[k] / v->PixelClock[k];
3509 if (v->GPUVMEnable == true && v->HostVMEnable == true) {
3510 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3512 HostVMDynamicLevelsTrips = 0;
3515 if (v->GPUVMEnable == true || v->DCCEnable[k] == true) {
3516 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes;
3519 if (v->GPUVMEnable == true) {
3520 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3521 v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3522 UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
3525 TimeForFetchingMetaPTEImmediateFlip = 0;
3528 v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3529 if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3530 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
3531 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3532 UrgentLatency * (HostVMDynamicLevelsTrips + 1),
3535 TimeForFetchingRowInVBlankImmediateFlip = 0;
3538 v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3540 if (v->GPUVMEnable == true) {
3541 v->final_flip_bw[k] = dml_max(
3542 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime),
3543 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime));
3544 } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3545 v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime);
3547 v->final_flip_bw[k] = 0;
3550 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
3551 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3552 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3553 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3554 min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3556 min_row_time = dml_min4(
3557 v->dpte_row_height[k] * LineTime / v->VRatio[k],
3558 v->meta_row_height[k] * LineTime / v->VRatio[k],
3559 v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k],
3560 v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3563 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3564 min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k];
3565 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3566 min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k];
3568 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]);
3572 if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16
3573 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3574 v->ImmediateFlipSupportedForPipe[k] = false;
3576 v->ImmediateFlipSupportedForPipe[k] = true;
3579 #ifdef __DML_VBA_DEBUG__
3580 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]);
3581 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]);
3582 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
3583 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
3584 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
3585 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]);
3590 static double TruncToValidBPP(
3598 enum output_encoder_class Output,
3599 enum output_format_class Format,
3600 unsigned int DSCInputBitPerComponent,
3604 enum odm_combine_mode ODMCombine)
3613 if (Format == dm_420) {
3618 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
3619 } else if (Format == dm_444) {
3624 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3631 if (Format == dm_n422) {
3633 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3636 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3640 if (DSCEnable && Output == dm_dp) {
3641 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3643 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3646 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3648 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3652 if (DesiredBPP == 0) {
3654 if (MaxLinkBPP < MinDSCBPP) {
3656 } else if (MaxLinkBPP >= MaxDSCBPP) {
3659 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3662 if (MaxLinkBPP >= NonDSCBPP2) {
3664 } else if (MaxLinkBPP >= NonDSCBPP1) {
3666 } else if (MaxLinkBPP >= NonDSCBPP0) {
3673 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
3674 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3683 static noinline void CalculatePrefetchSchedulePerPlane(
3684 struct display_mode_lib *mode_lib,
3685 double HostVMInefficiencyFactor,
3690 struct vba_vars_st *v = &mode_lib->vba;
3693 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
3694 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
3695 myPipe.PixelClock = v->PixelClock[k];
3696 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
3697 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
3698 myPipe.ScalerEnabled = v->ScalerEnabled[k];
3699 myPipe.VRatio = mode_lib->vba.VRatio[k];
3700 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k];
3702 myPipe.SourceScan = v->SourceScan[k];
3703 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
3704 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
3705 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
3706 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
3707 myPipe.InterlaceEnable = v->Interlace[k];
3708 myPipe.NumberOfCursors = v->NumberOfCursors[k];
3709 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
3710 myPipe.HTotal = v->HTotal[k];
3711 myPipe.DCCEnable = v->DCCEnable[k];
3712 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
3713 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
3714 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
3715 myPipe.BytePerPixelY = v->BytePerPixelY[k];
3716 myPipe.BytePerPixelC = v->BytePerPixelC[k];
3717 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
3718 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
3720 HostVMInefficiencyFactor,
3722 v->DSCDelayPerState[i][k],
3723 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
3725 v->DPPCLKDelaySCLLBOnly,
3726 v->DPPCLKDelayCNVCCursor,
3727 v->DISPCLKDelaySubtotal,
3728 v->SwathWidthYThisState[k] / v->HRatio[k],
3730 v->MaxInterDCNTileRepeaters,
3731 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
3732 v->MaximumVStartup[i][j][k],
3733 v->GPUVMMaxPageTableLevels,
3736 v->HostVMMaxNonCachedPageTableLevels,
3737 v->HostVMMinPageSize,
3738 v->DynamicMetadataEnable[k],
3739 v->DynamicMetadataVMEnabled,
3740 v->DynamicMetadataLinesBeforeActiveRequired[k],
3741 v->DynamicMetadataTransmittedBytes[k],
3745 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
3746 v->MetaRowBytes[i][j][k],
3747 v->DPTEBytesPerRow[i][j][k],
3748 v->PrefetchLinesY[i][j][k],
3749 v->SwathWidthYThisState[k],
3752 v->PrefetchLinesC[i][j][k],
3753 v->SwathWidthCThisState[k],
3756 v->swath_width_luma_ub_this_state[k],
3757 v->swath_width_chroma_ub_this_state[k],
3758 v->SwathHeightYThisState[k],
3759 v->SwathHeightCThisState[k],
3761 &v->DSTXAfterScaler[k],
3762 &v->DSTYAfterScaler[k],
3763 &v->LineTimesForPrefetch[k],
3765 &v->LinesForMetaPTE[k],
3766 &v->LinesForMetaAndDPTERow[k],
3767 &v->VRatioPreY[i][j][k],
3768 &v->VRatioPreC[i][j][k],
3769 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
3770 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
3771 &v->NoTimeForDynamicMetadata[i][j][k],
3773 &v->prefetch_vmrow_bw[k],
3777 &v->VUpdateOffsetPix[k],
3778 &v->VUpdateWidthPix[k],
3779 &v->VReadyOffsetPix[k]);
3782 static void PatchDETBufferSizeInKByte(unsigned int NumberOfActivePlanes, int NoOfDPPThisState[], unsigned int config_return_buffer_size_in_kbytes, unsigned int *DETBufferSizeInKByte)
3784 int i, total_pipes = 0;
3785 for (i = 0; i < NumberOfActivePlanes; i++)
3786 total_pipes += NoOfDPPThisState[i];
3787 *DETBufferSizeInKByte = ((config_return_buffer_size_in_kbytes - DCN3_15_MIN_COMPBUF_SIZE_KB) / 64 / total_pipes) * 64;
3788 if (*DETBufferSizeInKByte > DCN3_15_MAX_DET_SIZE)
3789 *DETBufferSizeInKByte = DCN3_15_MAX_DET_SIZE;
3793 void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3795 struct vba_vars_st *v = &mode_lib->vba;
3799 int ReorderingBytes;
3800 int MinPrefetchMode = 0, MaxPrefetchMode = 2;
3801 bool NoChroma = true;
3802 bool EnoughWritebackUnits = true;
3803 bool P2IWith420 = false;
3804 bool DSCOnlyIfNecessaryWithBPP = false;
3805 bool DSC422NativeNotSupported = false;
3806 double MaxTotalVActiveRDBandwidth;
3807 bool ViewportExceedsSurface = false;
3808 bool FMTBufferExceeded = false;
3810 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3812 CalculateMinAndMaxPrefetchMode(
3813 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3814 &MinPrefetchMode, &MaxPrefetchMode);
3816 /*Scale Ratio, taps Support Check*/
3818 v->ScaleRatioAndTapsSupport = true;
3819 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3820 if (v->ScalerEnabled[k] == false
3821 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3822 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3823 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3824 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
3825 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
3826 v->ScaleRatioAndTapsSupport = false;
3827 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3828 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
3829 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
3830 || v->VRatio[k] > v->vtaps[k]
3831 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3832 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3833 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3834 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
3835 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3836 || v->HRatioChroma[k] > v->MaxHSCLRatio
3837 || v->VRatioChroma[k] > v->MaxVSCLRatio
3838 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3839 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3840 v->ScaleRatioAndTapsSupport = false;
3843 /*Source Format, Pixel Format and Scan Support Check*/
3845 v->SourceFormatPixelAndScanSupport = true;
3846 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3847 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
3848 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t
3849 || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) {
3850 v->SourceFormatPixelAndScanSupport = false;
3853 /*Bandwidth Support Check*/
3855 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3856 dml30_CalculateBytePerPixelAnd256BBlockSizes(
3857 v->SourcePixelFormat[k],
3858 v->SurfaceTiling[k],
3859 &v->BytePerPixelY[k],
3860 &v->BytePerPixelC[k],
3861 &v->BytePerPixelInDETY[k],
3862 &v->BytePerPixelInDETC[k],
3863 &v->Read256BlockHeightY[k],
3864 &v->Read256BlockHeightC[k],
3865 &v->Read256BlockWidthY[k],
3866 &v->Read256BlockWidthC[k]);
3868 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3869 if (v->SourceScan[k] != dm_vert) {
3870 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3871 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3873 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3874 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3877 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3878 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
3879 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3880 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
3881 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3883 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3884 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
3885 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3886 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
3887 } else if (v->WritebackEnable[k] == true) {
3888 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3889 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
3891 v->WriteBandwidth[k] = 0.0;
3895 /*Writeback Latency support check*/
3897 v->WritebackLatencySupport = true;
3898 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3899 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
3900 v->WritebackLatencySupport = false;
3904 /*Writeback Mode Support Check*/
3906 v->TotalNumberOfActiveWriteback = 0;
3907 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3908 if (v->WritebackEnable[k] == true) {
3909 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
3913 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
3914 EnoughWritebackUnits = false;
3917 /*Writeback Scale Ratio and Taps Support Check*/
3919 v->WritebackScaleRatioAndTapsSupport = true;
3920 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3921 if (v->WritebackEnable[k] == true) {
3922 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
3923 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
3924 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
3925 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
3926 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
3927 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
3928 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
3929 v->WritebackScaleRatioAndTapsSupport = false;
3931 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
3932 v->WritebackScaleRatioAndTapsSupport = false;
3936 /*Maximum DISPCLK/DPPCLK Support check*/
3938 v->WritebackRequiredDISPCLK = 0.0;
3939 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3940 if (v->WritebackEnable[k] == true) {
3941 v->WritebackRequiredDISPCLK = dml_max(
3942 v->WritebackRequiredDISPCLK,
3943 dml31_CalculateWriteBackDISPCLK(
3944 v->WritebackPixelFormat[k],
3946 v->WritebackHRatio[k],
3947 v->WritebackVRatio[k],
3948 v->WritebackHTaps[k],
3949 v->WritebackVTaps[k],
3950 v->WritebackSourceWidth[k],
3951 v->WritebackDestinationWidth[k],
3953 v->WritebackLineBufferSize));
3956 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3957 if (v->HRatio[k] > 1.0) {
3958 v->PSCL_FACTOR[k] = dml_min(
3959 v->MaxDCHUBToPSCLThroughput,
3960 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
3962 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3964 if (v->BytePerPixelC[k] == 0.0) {
3965 v->PSCL_FACTOR_CHROMA[k] = 0.0;
3966 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
3968 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
3969 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
3971 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3972 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3975 if (v->HRatioChroma[k] > 1.0) {
3976 v->PSCL_FACTOR_CHROMA[k] = dml_min(
3977 v->MaxDCHUBToPSCLThroughput,
3978 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
3980 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3982 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
3984 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
3985 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
3986 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
3987 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
3989 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
3990 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3991 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3995 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3996 int MaximumSwathWidthSupportLuma;
3997 int MaximumSwathWidthSupportChroma;
3999 if (v->SurfaceTiling[k] == dm_sw_linear) {
4000 MaximumSwathWidthSupportLuma = 8192.0;
4001 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
4002 MaximumSwathWidthSupportLuma = 2880.0;
4003 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4004 MaximumSwathWidthSupportLuma = 3840.0;
4006 MaximumSwathWidthSupportLuma = 5760.0;
4009 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
4010 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
4012 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
4014 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
4015 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
4016 if (v->BytePerPixelC[k] == 0.0) {
4017 v->MaximumSwathWidthInLineBufferChroma = 0;
4019 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
4020 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
4022 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
4023 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
4026 CalculateSwathAndDETConfiguration(
4028 v->NumberOfActivePlanes,
4029 v->DETBufferSizeInKByte[0],
4030 v->MaximumSwathWidthLuma,
4031 v->MaximumSwathWidthChroma,
4033 v->SourcePixelFormat,
4041 v->Read256BlockHeightY,
4042 v->Read256BlockHeightC,
4043 v->Read256BlockWidthY,
4044 v->Read256BlockWidthC,
4045 v->odm_combine_dummy,
4046 v->BlendingAndTiming,
4049 v->BytePerPixelInDETY,
4050 v->BytePerPixelInDETC,
4054 v->NoOfDPPThisState,
4055 v->swath_width_luma_ub_this_state,
4056 v->swath_width_chroma_ub_this_state,
4057 v->SwathWidthYThisState,
4058 v->SwathWidthCThisState,
4059 v->SwathHeightYThisState,
4060 v->SwathHeightCThisState,
4061 v->DETBufferSizeYThisState,
4062 v->DETBufferSizeCThisState,
4063 v->SingleDPPViewportSizeSupportPerPlane,
4064 &v->ViewportSizeSupport[0][0]);
4066 for (i = 0; i < v->soc.num_states; i++) {
4067 for (j = 0; j < 2; j++) {
4068 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
4069 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
4070 v->RequiredDISPCLK[i][j] = 0.0;
4071 v->DISPCLK_DPPCLK_Support[i][j] = true;
4072 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4073 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4074 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4075 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
4076 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4077 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4078 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
4079 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4081 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4082 * (1 + v->DISPCLKRampingMargin / 100.0);
4083 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
4084 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4085 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4086 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
4087 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4089 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4090 * (1 + v->DISPCLKRampingMargin / 100.0);
4091 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
4092 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4093 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4094 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
4095 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4098 if (v->ODMCombinePolicy == dm_odm_combine_policy_none
4099 || !(v->Output[k] == dm_dp ||
4100 v->Output[k] == dm_dp2p0 ||
4101 v->Output[k] == dm_edp)) {
4102 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4103 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4105 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4106 FMTBufferExceeded = true;
4107 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
4108 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4109 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4110 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
4111 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
4112 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4113 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4114 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
4115 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4116 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4118 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4119 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4121 if (v->DSCEnabled[k] && v->HActive[k] > DCN31_MAX_DSC_IMAGE_WIDTH
4122 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4123 if (v->HActive[k] / 2 > DCN31_MAX_DSC_IMAGE_WIDTH) {
4124 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4125 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4127 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4128 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4131 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN31_MAX_FMT_420_BUFFER_WIDTH
4132 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4133 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) {
4134 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4135 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4137 if (v->HActive[k] / 4 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4138 FMTBufferExceeded = true;
4140 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4141 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4144 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4145 v->MPCCombine[i][j][k] = false;
4146 v->NoOfDPP[i][j][k] = 4;
4147 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4148 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4149 v->MPCCombine[i][j][k] = false;
4150 v->NoOfDPP[i][j][k] = 2;
4151 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4152 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4153 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4154 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4155 v->MPCCombine[i][j][k] = false;
4156 v->NoOfDPP[i][j][k] = 1;
4157 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4159 v->MPCCombine[i][j][k] = true;
4160 v->NoOfDPP[i][j][k] = 2;
4161 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4163 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4164 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4165 > v->MaxDppclkRoundedDownToDFSGranularity)
4166 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4167 v->DISPCLK_DPPCLK_Support[i][j] = false;
4170 v->TotalNumberOfActiveDPP[i][j] = 0;
4171 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4172 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4173 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4174 if (v->NoOfDPP[i][j][k] == 1)
4175 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4176 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4177 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
4182 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
4183 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) {
4184 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4185 double BWOfNonSplitPlaneOfMaximumBandwidth;
4186 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4187 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4188 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4189 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4190 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4191 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4192 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4193 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4196 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4197 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4198 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4199 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4200 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4201 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4202 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4205 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4206 v->RequiredDISPCLK[i][j] = 0.0;
4207 v->DISPCLK_DPPCLK_Support[i][j] = true;
4208 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4209 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4210 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4211 v->MPCCombine[i][j][k] = true;
4212 v->NoOfDPP[i][j][k] = 2;
4213 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4214 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4216 v->MPCCombine[i][j][k] = false;
4217 v->NoOfDPP[i][j][k] = 1;
4218 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4219 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4221 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4222 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4223 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4224 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4226 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4228 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4229 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4230 > v->MaxDppclkRoundedDownToDFSGranularity)
4231 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4232 v->DISPCLK_DPPCLK_Support[i][j] = false;
4235 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4236 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4237 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4240 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4241 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4242 v->DISPCLK_DPPCLK_Support[i][j] = false;
4247 /*Total Available Pipes Support Check*/
4249 for (i = 0; i < v->soc.num_states; i++) {
4250 for (j = 0; j < 2; j++) {
4251 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4252 v->TotalAvailablePipesSupport[i][j] = true;
4254 v->TotalAvailablePipesSupport[i][j] = false;
4258 /*Display IO and DSC Support Check*/
4260 v->NonsupportedDSCInputBPC = false;
4261 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4262 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
4263 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
4264 v->NonsupportedDSCInputBPC = true;
4268 /*Number Of DSC Slices*/
4269 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4270 if (v->BlendingAndTiming[k] == k) {
4271 if (v->PixelClockBackEnd[k] > 3200) {
4272 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4273 } else if (v->PixelClockBackEnd[k] > 1360) {
4274 v->NumberOfDSCSlices[k] = 8;
4275 } else if (v->PixelClockBackEnd[k] > 680) {
4276 v->NumberOfDSCSlices[k] = 4;
4277 } else if (v->PixelClockBackEnd[k] > 340) {
4278 v->NumberOfDSCSlices[k] = 2;
4280 v->NumberOfDSCSlices[k] = 1;
4283 v->NumberOfDSCSlices[k] = 0;
4287 for (i = 0; i < v->soc.num_states; i++) {
4288 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4289 v->RequiresDSC[i][k] = false;
4290 v->RequiresFEC[i][k] = false;
4291 if (v->BlendingAndTiming[k] == k) {
4292 if (v->Output[k] == dm_hdmi) {
4293 v->RequiresDSC[i][k] = false;
4294 v->RequiresFEC[i][k] = false;
4295 v->OutputBppPerState[i][k] = TruncToValidBPP(
4296 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4300 v->PixelClockBackEnd[k],
4301 v->ForcedOutputLinkBPP[k],
4305 v->DSCInputBitPerComponent[k],
4306 v->NumberOfDSCSlices[k],
4307 v->AudioSampleRate[k],
4308 v->AudioSampleLayout[k],
4309 v->ODMCombineEnablePerState[i][k]);
4310 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_dp2p0) {
4311 if (v->DSCEnable[k] == true) {
4312 v->RequiresDSC[i][k] = true;
4313 v->LinkDSCEnable = true;
4314 if (v->Output[k] == dm_dp || v->Output[k] == dm_dp2p0) {
4315 v->RequiresFEC[i][k] = true;
4317 v->RequiresFEC[i][k] = false;
4320 v->RequiresDSC[i][k] = false;
4321 v->LinkDSCEnable = false;
4322 if (v->Output[k] == dm_dp2p0) {
4323 v->RequiresFEC[i][k] = true;
4325 v->RequiresFEC[i][k] = false;
4328 if (v->Output[k] == dm_dp2p0) {
4329 v->Outbpp = BPP_INVALID;
4330 if ((v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr10) &&
4331 v->PHYCLKD18PerState[k] >= 10000.0 / 18.0) {
4332 v->Outbpp = TruncToValidBPP(
4333 (1.0 - v->Downspreading / 100.0) * 10000,
4334 v->OutputLinkDPLanes[k],
4337 v->PixelClockBackEnd[k],
4338 v->ForcedOutputLinkBPP[k],
4342 v->DSCInputBitPerComponent[k],
4343 v->NumberOfDSCSlices[k],
4344 v->AudioSampleRate[k],
4345 v->AudioSampleLayout[k],
4346 v->ODMCombineEnablePerState[i][k]);
4347 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 13500.0 / 18.0 &&
4348 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
4349 v->RequiresDSC[i][k] = true;
4350 v->LinkDSCEnable = true;
4351 v->Outbpp = TruncToValidBPP(
4352 (1.0 - v->Downspreading / 100.0) * 10000,
4353 v->OutputLinkDPLanes[k],
4356 v->PixelClockBackEnd[k],
4357 v->ForcedOutputLinkBPP[k],
4361 v->DSCInputBitPerComponent[k],
4362 v->NumberOfDSCSlices[k],
4363 v->AudioSampleRate[k],
4364 v->AudioSampleLayout[k],
4365 v->ODMCombineEnablePerState[i][k]);
4367 v->OutputBppPerState[i][k] = v->Outbpp;
4368 // TODO: Need some other way to handle this nonsense
4369 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR10"
4371 if (v->Outbpp == BPP_INVALID &&
4372 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr13p5) &&
4373 v->PHYCLKD18PerState[k] >= 13500.0 / 18.0) {
4374 v->Outbpp = TruncToValidBPP(
4375 (1.0 - v->Downspreading / 100.0) * 13500,
4376 v->OutputLinkDPLanes[k],
4379 v->PixelClockBackEnd[k],
4380 v->ForcedOutputLinkBPP[k],
4384 v->DSCInputBitPerComponent[k],
4385 v->NumberOfDSCSlices[k],
4386 v->AudioSampleRate[k],
4387 v->AudioSampleLayout[k],
4388 v->ODMCombineEnablePerState[i][k]);
4389 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 20000.0 / 18.0 &&
4390 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
4391 v->RequiresDSC[i][k] = true;
4392 v->LinkDSCEnable = true;
4393 v->Outbpp = TruncToValidBPP(
4394 (1.0 - v->Downspreading / 100.0) * 13500,
4395 v->OutputLinkDPLanes[k],
4398 v->PixelClockBackEnd[k],
4399 v->ForcedOutputLinkBPP[k],
4403 v->DSCInputBitPerComponent[k],
4404 v->NumberOfDSCSlices[k],
4405 v->AudioSampleRate[k],
4406 v->AudioSampleLayout[k],
4407 v->ODMCombineEnablePerState[i][k]);
4409 v->OutputBppPerState[i][k] = v->Outbpp;
4410 // TODO: Need some other way to handle this nonsense
4411 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR13p5"
4413 if (v->Outbpp == BPP_INVALID &&
4414 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr20) &&
4415 v->PHYCLKD18PerState[k] >= 20000.0 / 18.0) {
4416 v->Outbpp = TruncToValidBPP(
4417 (1.0 - v->Downspreading / 100.0) * 20000,
4418 v->OutputLinkDPLanes[k],
4421 v->PixelClockBackEnd[k],
4422 v->ForcedOutputLinkBPP[k],
4426 v->DSCInputBitPerComponent[k],
4427 v->NumberOfDSCSlices[k],
4428 v->AudioSampleRate[k],
4429 v->AudioSampleLayout[k],
4430 v->ODMCombineEnablePerState[i][k]);
4431 if (v->Outbpp == BPP_INVALID && v->DSCEnable[k] == true &&
4432 v->ForcedOutputLinkBPP[k] == 0) {
4433 v->RequiresDSC[i][k] = true;
4434 v->LinkDSCEnable = true;
4435 v->Outbpp = TruncToValidBPP(
4436 (1.0 - v->Downspreading / 100.0) * 20000,
4437 v->OutputLinkDPLanes[k],
4440 v->PixelClockBackEnd[k],
4441 v->ForcedOutputLinkBPP[k],
4445 v->DSCInputBitPerComponent[k],
4446 v->NumberOfDSCSlices[k],
4447 v->AudioSampleRate[k],
4448 v->AudioSampleLayout[k],
4449 v->ODMCombineEnablePerState[i][k]);
4451 v->OutputBppPerState[i][k] = v->Outbpp;
4452 // TODO: Need some other way to handle this nonsense
4453 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR20"
4456 v->Outbpp = BPP_INVALID;
4457 if (v->PHYCLKPerState[i] >= 270.0) {
4458 v->Outbpp = TruncToValidBPP(
4459 (1.0 - v->Downspreading / 100.0) * 2700,
4460 v->OutputLinkDPLanes[k],
4463 v->PixelClockBackEnd[k],
4464 v->ForcedOutputLinkBPP[k],
4468 v->DSCInputBitPerComponent[k],
4469 v->NumberOfDSCSlices[k],
4470 v->AudioSampleRate[k],
4471 v->AudioSampleLayout[k],
4472 v->ODMCombineEnablePerState[i][k]);
4473 v->OutputBppPerState[i][k] = v->Outbpp;
4474 // TODO: Need some other way to handle this nonsense
4475 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4477 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4478 v->Outbpp = TruncToValidBPP(
4479 (1.0 - v->Downspreading / 100.0) * 5400,
4480 v->OutputLinkDPLanes[k],
4483 v->PixelClockBackEnd[k],
4484 v->ForcedOutputLinkBPP[k],
4488 v->DSCInputBitPerComponent[k],
4489 v->NumberOfDSCSlices[k],
4490 v->AudioSampleRate[k],
4491 v->AudioSampleLayout[k],
4492 v->ODMCombineEnablePerState[i][k]);
4493 v->OutputBppPerState[i][k] = v->Outbpp;
4494 // TODO: Need some other way to handle this nonsense
4495 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4497 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4498 v->Outbpp = TruncToValidBPP(
4499 (1.0 - v->Downspreading / 100.0) * 8100,
4500 v->OutputLinkDPLanes[k],
4503 v->PixelClockBackEnd[k],
4504 v->ForcedOutputLinkBPP[k],
4508 v->DSCInputBitPerComponent[k],
4509 v->NumberOfDSCSlices[k],
4510 v->AudioSampleRate[k],
4511 v->AudioSampleLayout[k],
4512 v->ODMCombineEnablePerState[i][k]);
4513 v->OutputBppPerState[i][k] = v->Outbpp;
4514 // TODO: Need some other way to handle this nonsense
4515 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4520 v->OutputBppPerState[i][k] = 0;
4525 for (i = 0; i < v->soc.num_states; i++) {
4526 v->LinkCapacitySupport[i] = true;
4527 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4528 if (v->BlendingAndTiming[k] == k
4529 && (v->Output[k] == dm_dp ||
4530 v->Output[k] == dm_edp ||
4531 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
4532 v->LinkCapacitySupport[i] = false;
4538 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4539 if (v->BlendingAndTiming[k] == k
4540 && (v->Output[k] == dm_dp ||
4541 v->Output[k] == dm_edp ||
4542 v->Output[k] == dm_hdmi)) {
4543 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
4546 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
4547 && !v->DSC422NativeSupport) {
4548 DSC422NativeNotSupported = true;
4553 for (i = 0; i < v->soc.num_states; ++i) {
4554 v->ODMCombine4To1SupportCheckOK[i] = true;
4555 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4556 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4557 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
4558 || v->Output[k] == dm_hdmi)) {
4559 v->ODMCombine4To1SupportCheckOK[i] = false;
4564 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4566 for (i = 0; i < v->soc.num_states; i++) {
4567 v->NotEnoughDSCUnits[i] = false;
4568 v->TotalDSCUnitsRequired = 0.0;
4569 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4570 if (v->RequiresDSC[i][k] == true) {
4571 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4572 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4573 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4574 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4576 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4580 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4581 v->NotEnoughDSCUnits[i] = true;
4584 /*DSC Delay per state*/
4586 for (i = 0; i < v->soc.num_states; i++) {
4587 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4588 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4591 v->BPP = v->OutputBppPerState[i][k];
4593 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4594 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4595 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4596 v->DSCInputBitPerComponent[k],
4598 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4599 v->NumberOfDSCSlices[k],
4601 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4602 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4603 v->DSCDelayPerState[i][k] = 2.0
4604 * (dscceComputeDelay(
4605 v->DSCInputBitPerComponent[k],
4607 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4608 v->NumberOfDSCSlices[k] / 2,
4610 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4612 v->DSCDelayPerState[i][k] = 4.0
4613 * (dscceComputeDelay(
4614 v->DSCInputBitPerComponent[k],
4616 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4617 v->NumberOfDSCSlices[k] / 4,
4619 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4621 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4623 v->DSCDelayPerState[i][k] = 0.0;
4626 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4627 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4628 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4629 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4635 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4637 for (i = 0; i < v->soc.num_states; ++i) {
4638 for (j = 0; j <= 1; ++j) {
4639 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4640 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4641 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4642 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4645 if (v->NumberOfActivePlanes > 1 && mode_lib->project == DML_PROJECT_DCN315)
4646 PatchDETBufferSizeInKByte(v->NumberOfActivePlanes, v->NoOfDPPThisState, v->ip.config_return_buffer_size_in_kbytes, &v->DETBufferSizeInKByte[0]);
4647 CalculateSwathAndDETConfiguration(
4649 v->NumberOfActivePlanes,
4650 v->DETBufferSizeInKByte[0],
4651 v->MaximumSwathWidthLuma,
4652 v->MaximumSwathWidthChroma,
4654 v->SourcePixelFormat,
4662 v->Read256BlockHeightY,
4663 v->Read256BlockHeightC,
4664 v->Read256BlockWidthY,
4665 v->Read256BlockWidthC,
4666 v->ODMCombineEnableThisState,
4667 v->BlendingAndTiming,
4670 v->BytePerPixelInDETY,
4671 v->BytePerPixelInDETC,
4675 v->NoOfDPPThisState,
4676 v->swath_width_luma_ub_this_state,
4677 v->swath_width_chroma_ub_this_state,
4678 v->SwathWidthYThisState,
4679 v->SwathWidthCThisState,
4680 v->SwathHeightYThisState,
4681 v->SwathHeightCThisState,
4682 v->DETBufferSizeYThisState,
4683 v->DETBufferSizeCThisState,
4685 &v->ViewportSizeSupport[i][j]);
4687 CalculateDCFCLKDeepSleep(
4689 v->NumberOfActivePlanes,
4694 v->SwathWidthYThisState,
4695 v->SwathWidthCThisState,
4696 v->NoOfDPPThisState,
4701 v->PSCL_FACTOR_CHROMA,
4702 v->RequiredDPPCLKThisState,
4703 v->ReadBandwidthLuma,
4704 v->ReadBandwidthChroma,
4706 &v->ProjectedDCFCLKDeepSleep[i][j]);
4708 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4709 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4710 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4711 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4712 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4713 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4714 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4715 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4716 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4721 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4722 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
4723 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4726 for (i = 0; i < v->soc.num_states; i++) {
4727 for (j = 0; j < 2; j++) {
4728 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
4730 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4731 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4732 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4733 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4734 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4735 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4736 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4737 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4738 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4741 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4742 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4743 if (v->DCCEnable[k] == true) {
4744 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4748 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4749 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4750 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4752 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
4753 && v->SourceScan[k] != dm_vert) {
4754 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
4756 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4758 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4759 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4762 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4765 v->Read256BlockHeightC[k],
4766 v->Read256BlockWidthC[k],
4767 v->SourcePixelFormat[k],
4768 v->SurfaceTiling[k],
4769 v->BytePerPixelC[k],
4771 v->SwathWidthCThisState[k],
4772 v->ViewportHeightChroma[k],
4775 v->HostVMMaxNonCachedPageTableLevels,
4776 v->GPUVMMinPageSize,
4777 v->HostVMMinPageSize,
4778 v->PTEBufferSizeInRequestsForChroma,
4781 &v->MacroTileWidthC[k],
4783 &v->DPTEBytesPerRowC,
4784 &v->PTEBufferSizeNotExceededC[i][j][k],
4786 &v->dpte_row_height_chroma[k],
4790 &v->meta_row_height_chroma[k],
4797 &v->dummyinteger11);
4799 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4804 v->ProgressiveToInterlaceUnitInOPP,
4805 v->SwathHeightCThisState[k],
4806 v->ViewportYStartC[k],
4810 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4811 v->PTEBufferSizeInRequestsForChroma = 0;
4812 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4813 v->MetaRowBytesC = 0.0;
4814 v->DPTEBytesPerRowC = 0.0;
4815 v->PrefetchLinesC[i][j][k] = 0.0;
4816 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4818 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4821 v->Read256BlockHeightY[k],
4822 v->Read256BlockWidthY[k],
4823 v->SourcePixelFormat[k],
4824 v->SurfaceTiling[k],
4825 v->BytePerPixelY[k],
4827 v->SwathWidthYThisState[k],
4828 v->ViewportHeight[k],
4831 v->HostVMMaxNonCachedPageTableLevels,
4832 v->GPUVMMinPageSize,
4833 v->HostVMMinPageSize,
4834 v->PTEBufferSizeInRequestsForLuma,
4836 v->DCCMetaPitchY[k],
4837 &v->MacroTileWidthY[k],
4839 &v->DPTEBytesPerRowY,
4840 &v->PTEBufferSizeNotExceededY[i][j][k],
4842 &v->dpte_row_height[k],
4846 &v->meta_row_height[k],
4848 &v->dpte_group_bytes[k],
4854 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4859 v->ProgressiveToInterlaceUnitInOPP,
4860 v->SwathHeightYThisState[k],
4861 v->ViewportYStartY[k],
4864 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4865 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4866 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4868 CalculateRowBandwidth(
4870 v->SourcePixelFormat[k],
4874 v->HTotal[k] / v->PixelClock[k],
4877 v->meta_row_height[k],
4878 v->meta_row_height_chroma[k],
4879 v->DPTEBytesPerRowY,
4880 v->DPTEBytesPerRowC,
4881 v->dpte_row_height[k],
4882 v->dpte_row_height_chroma[k],
4883 &v->meta_row_bandwidth[i][j][k],
4884 &v->dpte_row_bandwidth[i][j][k]);
4886 /*DCCMetaBufferSizeSupport(i, j) = True
4887 For k = 0 To NumberOfActivePlanes - 1
4888 If MetaRowBytes(i, j, k) > 24064 Then
4889 DCCMetaBufferSizeSupport(i, j) = False
4892 v->DCCMetaBufferSizeSupport[i][j] = true;
4893 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4894 if (v->MetaRowBytes[i][j][k] > 24064)
4895 v->DCCMetaBufferSizeSupport[i][j] = false;
4897 v->UrgLatency[i] = CalculateUrgentLatency(
4898 v->UrgentLatencyPixelDataOnly,
4899 v->UrgentLatencyPixelMixedWithVMData,
4900 v->UrgentLatencyVMDataOnly,
4901 v->DoUrgentLatencyAdjustment,
4902 v->UrgentLatencyAdjustmentFabricClockComponent,
4903 v->UrgentLatencyAdjustmentFabricClockReference,
4904 v->FabricClockPerState[i]);
4906 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4907 CalculateUrgentBurstFactor(
4908 v->swath_width_luma_ub_this_state[k],
4909 v->swath_width_chroma_ub_this_state[k],
4910 v->SwathHeightYThisState[k],
4911 v->SwathHeightCThisState[k],
4912 v->HTotal[k] / v->PixelClock[k],
4914 v->CursorBufferSize,
4915 v->CursorWidth[k][0],
4919 v->BytePerPixelInDETY[k],
4920 v->BytePerPixelInDETC[k],
4921 v->DETBufferSizeYThisState[k],
4922 v->DETBufferSizeCThisState[k],
4923 &v->UrgentBurstFactorCursor[k],
4924 &v->UrgentBurstFactorLuma[k],
4925 &v->UrgentBurstFactorChroma[k],
4926 &NotUrgentLatencyHiding[k]);
4929 v->NotEnoughUrgentLatencyHidingA[i][j] = false;
4930 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4931 if (NotUrgentLatencyHiding[k]) {
4932 v->NotEnoughUrgentLatencyHidingA[i][j] = true;
4936 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4937 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
4938 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
4939 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
4942 v->TotalVActivePixelBandwidth[i][j] = 0;
4943 v->TotalVActiveCursorBandwidth[i][j] = 0;
4944 v->TotalMetaRowBandwidth[i][j] = 0;
4945 v->TotalDPTERowBandwidth[i][j] = 0;
4946 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4947 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
4948 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
4949 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
4950 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
4955 //Calculate Return BW
4956 for (i = 0; i < v->soc.num_states; ++i) {
4957 for (j = 0; j <= 1; ++j) {
4958 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4959 if (v->BlendingAndTiming[k] == k) {
4960 if (v->WritebackEnable[k] == true) {
4961 v->WritebackDelayTime[k] = v->WritebackLatency
4962 + CalculateWriteBackDelay(
4963 v->WritebackPixelFormat[k],
4964 v->WritebackHRatio[k],
4965 v->WritebackVRatio[k],
4966 v->WritebackVTaps[k],
4967 v->WritebackDestinationWidth[k],
4968 v->WritebackDestinationHeight[k],
4969 v->WritebackSourceHeight[k],
4970 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
4972 v->WritebackDelayTime[k] = 0.0;
4974 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4975 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
4976 v->WritebackDelayTime[k] = dml_max(
4977 v->WritebackDelayTime[k],
4979 + CalculateWriteBackDelay(
4980 v->WritebackPixelFormat[m],
4981 v->WritebackHRatio[m],
4982 v->WritebackVRatio[m],
4983 v->WritebackVTaps[m],
4984 v->WritebackDestinationWidth[m],
4985 v->WritebackDestinationHeight[m],
4986 v->WritebackSourceHeight[m],
4987 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
4992 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4993 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4994 if (v->BlendingAndTiming[k] == m) {
4995 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
4999 v->MaxMaxVStartup[i][j] = 0;
5000 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5001 v->MaximumVStartup[i][j][k] =
5002 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
5003 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
5004 v->VTotal[k] - v->VActive[k]
5008 1.0 * v->WritebackDelayTime[k]
5010 / v->PixelClock[k]),
5012 if (v->MaximumVStartup[i][j][k] > 1023)
5013 v->MaximumVStartup[i][j][k] = 1023;
5014 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
5019 ReorderingBytes = v->NumberOfChannels
5021 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
5022 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
5023 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
5025 for (i = 0; i < v->soc.num_states; ++i) {
5026 for (j = 0; j <= 1; ++j) {
5027 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
5031 if (v->UseMinimumRequiredDCFCLK == true)
5032 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes);
5034 for (i = 0; i < v->soc.num_states; ++i) {
5035 for (j = 0; j <= 1; ++j) {
5036 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
5037 v->ReturnBusWidth * v->DCFCLKState[i][j],
5038 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
5039 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
5040 double PixelDataOnlyReturnBWPerState = dml_min(
5041 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5042 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
5043 double PixelMixedWithVMDataReturnBWPerState = dml_min(
5044 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5045 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
5047 if (v->HostVMEnable != true) {
5048 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
5050 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
5055 //Re-ordering Buffer Support Check
5056 for (i = 0; i < v->soc.num_states; ++i) {
5057 for (j = 0; j <= 1; ++j) {
5058 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
5059 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
5060 v->ROBSupport[i][j] = true;
5062 v->ROBSupport[i][j] = false;
5067 //Vertical Active BW support check
5069 MaxTotalVActiveRDBandwidth = 0;
5070 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5071 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
5074 for (i = 0; i < v->soc.num_states; ++i) {
5075 for (j = 0; j <= 1; ++j) {
5076 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
5078 v->ReturnBusWidth * v->DCFCLKState[i][j],
5079 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5080 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
5081 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5082 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
5084 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
5085 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
5087 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
5092 v->UrgentLatency = CalculateUrgentLatency(
5093 v->UrgentLatencyPixelDataOnly,
5094 v->UrgentLatencyPixelMixedWithVMData,
5095 v->UrgentLatencyVMDataOnly,
5096 v->DoUrgentLatencyAdjustment,
5097 v->UrgentLatencyAdjustmentFabricClockComponent,
5098 v->UrgentLatencyAdjustmentFabricClockReference,
5101 for (i = 0; i < v->soc.num_states; ++i) {
5102 for (j = 0; j <= 1; ++j) {
5103 double VMDataOnlyReturnBWPerState;
5104 double HostVMInefficiencyFactor = 1;
5105 int NextPrefetchModeState = MinPrefetchMode;
5106 bool UnboundedRequestEnabledThisState = false;
5107 int CompressedBufferSizeInkByteThisState = 0;
5110 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
5112 v->BandwidthWithoutPrefetchSupported[i][j] = true;
5113 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
5114 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
5115 v->BandwidthWithoutPrefetchSupported[i][j] = false;
5118 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5119 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
5120 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
5121 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
5122 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
5123 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
5124 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
5125 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
5126 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
5127 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
5130 VMDataOnlyReturnBWPerState = dml_min(
5132 v->ReturnBusWidth * v->DCFCLKState[i][j],
5133 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5134 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5135 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5136 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
5137 if (v->GPUVMEnable && v->HostVMEnable)
5138 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
5140 v->ExtraLatency = CalculateExtraLatency(
5141 v->RoundTripPingLatencyCycles,
5143 v->DCFCLKState[i][j],
5144 v->TotalNumberOfActiveDPP[i][j],
5145 v->PixelChunkSizeInKByte,
5146 v->TotalNumberOfDCCActiveDPP[i][j],
5148 v->ReturnBWPerState[i][j],
5151 v->NumberOfActivePlanes,
5152 v->NoOfDPPThisState,
5153 v->dpte_group_bytes,
5154 HostVMInefficiencyFactor,
5155 v->HostVMMinPageSize,
5156 v->HostVMMaxNonCachedPageTableLevels);
5158 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5160 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
5161 v->MaxVStartup = v->NextMaxVStartup;
5163 v->TWait = CalculateTWait(
5164 v->PrefetchModePerState[i][j],
5165 v->DRAMClockChangeLatency,
5167 v->SREnterPlusExitTime);
5169 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5170 CalculatePrefetchSchedulePerPlane(mode_lib,
5171 HostVMInefficiencyFactor,
5175 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5176 CalculateUrgentBurstFactor(
5177 v->swath_width_luma_ub_this_state[k],
5178 v->swath_width_chroma_ub_this_state[k],
5179 v->SwathHeightYThisState[k],
5180 v->SwathHeightCThisState[k],
5181 v->HTotal[k] / v->PixelClock[k],
5183 v->CursorBufferSize,
5184 v->CursorWidth[k][0],
5186 v->VRatioPreY[i][j][k],
5187 v->VRatioPreC[i][j][k],
5188 v->BytePerPixelInDETY[k],
5189 v->BytePerPixelInDETC[k],
5190 v->DETBufferSizeYThisState[k],
5191 v->DETBufferSizeCThisState[k],
5192 &v->UrgentBurstFactorCursorPre[k],
5193 &v->UrgentBurstFactorLumaPre[k],
5194 &v->UrgentBurstFactorChromaPre[k],
5195 &v->NotUrgentLatencyHidingPre[k]);
5198 v->MaximumReadBandwidthWithPrefetch = 0.0;
5199 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5200 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
5201 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
5203 v->MaximumReadBandwidthWithPrefetch =
5204 v->MaximumReadBandwidthWithPrefetch
5206 v->VActivePixelBandwidth[i][j][k]
5207 + v->VActiveCursorBandwidth[i][j][k]
5208 + v->NoOfDPP[i][j][k]
5209 * (v->meta_row_bandwidth[i][j][k]
5210 + v->dpte_row_bandwidth[i][j][k]),
5211 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5213 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5214 * v->UrgentBurstFactorLumaPre[k]
5215 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5216 * v->UrgentBurstFactorChromaPre[k])
5217 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5220 v->NotEnoughUrgentLatencyHidingPre = false;
5221 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5222 if (v->NotUrgentLatencyHidingPre[k] == true) {
5223 v->NotEnoughUrgentLatencyHidingPre = true;
5227 v->PrefetchSupported[i][j] = true;
5228 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5229 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5230 v->PrefetchSupported[i][j] = false;
5232 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5233 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5234 || v->NoTimeForPrefetch[i][j][k] == true) {
5235 v->PrefetchSupported[i][j] = false;
5239 v->DynamicMetadataSupported[i][j] = true;
5240 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5241 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5242 v->DynamicMetadataSupported[i][j] = false;
5246 v->VRatioInPrefetchSupported[i][j] = true;
5247 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5248 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5249 v->VRatioInPrefetchSupported[i][j] = false;
5252 v->AnyLinesForVMOrRowTooLarge = false;
5253 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5254 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5255 v->AnyLinesForVMOrRowTooLarge = true;
5259 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5261 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5262 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5263 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5264 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5266 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5268 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5269 * v->UrgentBurstFactorLumaPre[k]
5270 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5271 * v->UrgentBurstFactorChromaPre[k])
5272 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5274 v->TotImmediateFlipBytes = 0.0;
5275 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5276 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
5277 + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
5278 + v->DPTEBytesPerRow[i][j][k];
5281 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5282 CalculateFlipSchedule(
5285 HostVMInefficiencyFactor,
5288 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5289 v->MetaRowBytes[i][j][k],
5290 v->DPTEBytesPerRow[i][j][k]);
5292 v->total_dcn_read_bw_with_flip = 0.0;
5293 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5294 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5296 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5297 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5298 + v->VActiveCursorBandwidth[i][j][k],
5300 * (v->final_flip_bw[k]
5301 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5302 * v->UrgentBurstFactorLumaPre[k]
5303 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5304 * v->UrgentBurstFactorChromaPre[k])
5305 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5307 v->ImmediateFlipSupportedForState[i][j] = true;
5308 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5309 v->ImmediateFlipSupportedForState[i][j] = false;
5311 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5312 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5313 v->ImmediateFlipSupportedForState[i][j] = false;
5317 v->ImmediateFlipSupportedForState[i][j] = false;
5320 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
5321 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5322 NextPrefetchModeState = NextPrefetchModeState + 1;
5324 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5326 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5327 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5328 && ((v->HostVMEnable == false &&
5329 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5330 || v->ImmediateFlipSupportedForState[i][j] == true))
5331 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5333 CalculateUnboundedRequestAndCompressedBufferSize(
5334 v->DETBufferSizeInKByte[0],
5335 v->ConfigReturnBufferSizeInKByte,
5336 v->UseUnboundedRequesting,
5337 v->TotalNumberOfActiveDPP[i][j],
5340 v->CompressedBufferSegmentSizeInkByte,
5342 &UnboundedRequestEnabledThisState,
5343 &CompressedBufferSizeInkByteThisState);
5345 CalculateWatermarksAndDRAMSpeedChangeSupport(
5347 v->PrefetchModePerState[i][j],
5348 v->DCFCLKState[i][j],
5349 v->ReturnBWPerState[i][j],
5352 v->SOCCLKPerState[i],
5353 v->ProjectedDCFCLKDeepSleep[i][j],
5354 v->DETBufferSizeYThisState,
5355 v->DETBufferSizeCThisState,
5356 v->SwathHeightYThisState,
5357 v->SwathHeightCThisState,
5358 v->SwathWidthYThisState,
5359 v->SwathWidthCThisState,
5360 v->NoOfDPPThisState,
5361 v->BytePerPixelInDETY,
5362 v->BytePerPixelInDETC,
5363 UnboundedRequestEnabledThisState,
5364 CompressedBufferSizeInkByteThisState,
5365 &v->DRAMClockChangeSupport[i][j],
5373 /*PTE Buffer Size Check*/
5374 for (i = 0; i < v->soc.num_states; i++) {
5375 for (j = 0; j < 2; j++) {
5376 v->PTEBufferSizeNotExceeded[i][j] = true;
5377 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5378 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5379 v->PTEBufferSizeNotExceeded[i][j] = false;
5385 /*Cursor Support Check*/
5386 v->CursorSupport = true;
5387 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5388 if (v->CursorWidth[k][0] > 0.0) {
5389 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5390 v->CursorSupport = false;
5395 /*Valid Pitch Check*/
5396 v->PitchSupport = true;
5397 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5398 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5399 if (v->DCCEnable[k] == true) {
5400 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5402 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5404 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5405 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
5406 && v->SourcePixelFormat[k] != dm_mono_8) {
5407 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5408 if (v->DCCEnable[k] == true) {
5409 v->AlignedDCCMetaPitchC[k] = dml_ceil(
5410 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]),
5411 64.0 * v->Read256BlockWidthC[k]);
5413 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5416 v->AlignedCPitch[k] = v->PitchC[k];
5417 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5419 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
5420 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5421 v->PitchSupport = false;
5425 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5426 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
5427 ViewportExceedsSurface = true;
5428 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
5429 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
5430 && v->SourcePixelFormat[k] != dm_rgbe) {
5431 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
5432 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5433 ViewportExceedsSurface = true;
5439 /*Mode Support, Voltage State and SOC Configuration*/
5440 for (i = v->soc.num_states - 1; i >= 0; i--) {
5441 for (j = 0; j < 2; j++) {
5442 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
5443 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
5444 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
5445 && v->DTBCLKRequiredMoreThanSupported[i] == false
5446 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
5447 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
5448 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
5449 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
5450 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
5451 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5452 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
5453 && ((v->HostVMEnable == false
5454 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5455 || v->ImmediateFlipSupportedForState[i][j] == true)
5456 && FMTBufferExceeded == false) {
5457 v->ModeSupport[i][j] = true;
5459 v->ModeSupport[i][j] = false;
5460 #ifdef __DML_VBA_DEBUG__
5461 if (v->ScaleRatioAndTapsSupport == false)
5462 dml_print("DML SUPPORT: ScaleRatioAndTapsSupport failed");
5463 if (v->SourceFormatPixelAndScanSupport == false)
5464 dml_print("DML SUPPORT: SourceFormatPixelAndScanSupport failed");
5465 if (v->ViewportSizeSupport[i][j] == false)
5466 dml_print("DML SUPPORT: ViewportSizeSupport failed");
5467 if (v->LinkCapacitySupport[i] == false)
5468 dml_print("DML SUPPORT: LinkCapacitySupport failed");
5469 if (v->ODMCombine4To1SupportCheckOK[i] == false)
5470 dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
5471 if (v->NotEnoughDSCUnits[i] == true)
5472 dml_print("DML SUPPORT: NotEnoughDSCUnits");
5473 if (v->DTBCLKRequiredMoreThanSupported[i] == true)
5474 dml_print("DML SUPPORT: DTBCLKRequiredMoreThanSupported");
5475 if (v->ROBSupport[i][j] == false)
5476 dml_print("DML SUPPORT: ROBSupport failed");
5477 if (v->DISPCLK_DPPCLK_Support[i][j] == false)
5478 dml_print("DML SUPPORT: DISPCLK_DPPCLK_Support failed");
5479 if (v->TotalAvailablePipesSupport[i][j] == false)
5480 dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
5481 if (EnoughWritebackUnits == false)
5482 dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
5483 if (v->WritebackLatencySupport == false)
5484 dml_print("DML SUPPORT: WritebackLatencySupport failed");
5485 if (v->WritebackScaleRatioAndTapsSupport == false)
5486 dml_print("DML SUPPORT: DSC422NativeNotSupported ");
5487 if (v->CursorSupport == false)
5488 dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
5489 if (v->PitchSupport == false)
5490 dml_print("DML SUPPORT: PitchSupport failed");
5491 if (ViewportExceedsSurface == true)
5492 dml_print("DML SUPPORT: ViewportExceedsSurface failed");
5493 if (v->PrefetchSupported[i][j] == false)
5494 dml_print("DML SUPPORT: PrefetchSupported failed");
5495 if (v->DynamicMetadataSupported[i][j] == false)
5496 dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
5497 if (v->TotalVerticalActiveBandwidthSupport[i][j] == false)
5498 dml_print("DML SUPPORT: TotalVerticalActiveBandwidthSupport failed");
5499 if (v->VRatioInPrefetchSupported[i][j] == false)
5500 dml_print("DML SUPPORT: VRatioInPrefetchSupported failed");
5501 if (v->PTEBufferSizeNotExceeded[i][j] == false)
5502 dml_print("DML SUPPORT: PTEBufferSizeNotExceeded failed");
5503 if (v->NonsupportedDSCInputBPC == true)
5504 dml_print("DML SUPPORT: NonsupportedDSCInputBPC failed");
5505 if (!((v->HostVMEnable == false
5506 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5507 || v->ImmediateFlipSupportedForState[i][j] == true))
5508 dml_print("DML SUPPORT: ImmediateFlipRequirement failed");
5509 if (FMTBufferExceeded == true)
5510 dml_print("DML SUPPORT: FMTBufferExceeded failed");
5517 unsigned int MaximumMPCCombine = 0;
5518 for (i = v->soc.num_states; i >= 0; i--) {
5519 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5520 v->VoltageLevel = i;
5521 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5522 if (v->ModeSupport[i][0] == true) {
5523 MaximumMPCCombine = 0;
5525 MaximumMPCCombine = 1;
5529 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5530 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5531 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5532 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5534 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5535 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5536 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5537 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5538 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5539 v->maxMpcComb = MaximumMPCCombine;
5543 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5544 struct display_mode_lib *mode_lib,
5545 unsigned int PrefetchMode,
5548 double UrgentLatency,
5549 double ExtraLatency,
5551 double DCFCLKDeepSleep,
5552 unsigned int DETBufferSizeY[],
5553 unsigned int DETBufferSizeC[],
5554 unsigned int SwathHeightY[],
5555 unsigned int SwathHeightC[],
5556 double SwathWidthY[],
5557 double SwathWidthC[],
5558 unsigned int DPPPerPlane[],
5559 double BytePerPixelDETY[],
5560 double BytePerPixelDETC[],
5561 bool UnboundedRequestEnabled,
5562 int unsigned CompressedBufferSizeInkByte,
5563 enum clock_change_support *DRAMClockChangeSupport,
5564 double *StutterExitWatermark,
5565 double *StutterEnterPlusExitWatermark,
5566 double *Z8StutterExitWatermark,
5567 double *Z8StutterEnterPlusExitWatermark)
5569 struct vba_vars_st *v = &mode_lib->vba;
5570 double EffectiveLBLatencyHidingY;
5571 double EffectiveLBLatencyHidingC;
5572 double LinesInDETY[DC__NUM_DPP__MAX];
5574 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5575 unsigned int LinesInDETCRoundedDownToSwath;
5576 double FullDETBufferingTimeY;
5577 double FullDETBufferingTimeC;
5578 double ActiveDRAMClockChangeLatencyMarginY;
5579 double ActiveDRAMClockChangeLatencyMarginC;
5580 double WritebackDRAMClockChangeLatencyMargin;
5581 double PlaneWithMinActiveDRAMClockChangeMargin;
5582 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5583 double WritebackDRAMClockChangeLatencyHiding;
5584 double TotalPixelBW = 0.0;
5587 v->UrgentWatermark = UrgentLatency + ExtraLatency;
5589 #ifdef __DML_VBA_DEBUG__
5590 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
5591 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
5592 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark);
5595 v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark;
5597 #ifdef __DML_VBA_DEBUG__
5598 dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency);
5599 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark);
5602 v->TotalActiveWriteback = 0;
5603 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5604 if (v->WritebackEnable[k] == true) {
5605 v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
5609 if (v->TotalActiveWriteback <= 1) {
5610 v->WritebackUrgentWatermark = v->WritebackLatency;
5612 v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5615 if (v->TotalActiveWriteback <= 1) {
5616 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency;
5618 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5621 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5622 TotalPixelBW = TotalPixelBW
5623 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k])
5624 / (v->HTotal[k] / v->PixelClock[k]);
5627 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5628 double EffectiveDETBufferSizeY = DETBufferSizeY[k];
5630 v->LBLatencyHidingSourceLinesY = dml_min(
5631 (double) v->MaxLineBufferLines,
5632 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
5634 v->LBLatencyHidingSourceLinesC = dml_min(
5635 (double) v->MaxLineBufferLines,
5636 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
5638 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
5640 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
5642 if (UnboundedRequestEnabled) {
5643 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
5644 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
5647 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5648 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5649 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
5650 if (BytePerPixelDETC[k] > 0) {
5651 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5652 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5653 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k];
5656 FullDETBufferingTimeC = 999999;
5659 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
5660 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5662 if (v->NumberOfActivePlanes > 1) {
5663 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5664 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k];
5667 if (BytePerPixelDETC[k] > 0) {
5668 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
5669 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5671 if (v->NumberOfActivePlanes > 1) {
5672 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5673 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k];
5675 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5677 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5680 if (v->WritebackEnable[k] == true) {
5681 WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024
5682 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
5683 if (v->WritebackPixelFormat[k] == dm_444_64) {
5684 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5686 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
5687 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5691 v->MinActiveDRAMClockChangeMargin = 999999;
5692 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5693 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5694 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
5695 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
5696 if (v->BlendingAndTiming[k] == k) {
5697 PlaneWithMinActiveDRAMClockChangeMargin = k;
5699 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
5700 if (v->BlendingAndTiming[k] == j) {
5701 PlaneWithMinActiveDRAMClockChangeMargin = j;
5708 v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ;
5710 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5711 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5712 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5713 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5714 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
5718 v->TotalNumberOfActiveOTG = 0;
5720 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5721 if (v->BlendingAndTiming[k] == k) {
5722 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
5726 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5727 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5728 } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
5729 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
5730 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5732 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5735 *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5736 *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
5737 *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5738 *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5740 #ifdef __DML_VBA_DEBUG__
5741 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
5742 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
5743 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
5744 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
5748 static void CalculateDCFCLKDeepSleep(
5749 struct display_mode_lib *mode_lib,
5750 unsigned int NumberOfActivePlanes,
5751 int BytePerPixelY[],
5752 int BytePerPixelC[],
5754 double VRatioChroma[],
5755 double SwathWidthY[],
5756 double SwathWidthC[],
5757 unsigned int DPPPerPlane[],
5759 double HRatioChroma[],
5760 double PixelClock[],
5761 double PSCL_THROUGHPUT[],
5762 double PSCL_THROUGHPUT_CHROMA[],
5764 double ReadBandwidthLuma[],
5765 double ReadBandwidthChroma[],
5767 double *DCFCLKDeepSleep)
5769 struct vba_vars_st *v = &mode_lib->vba;
5770 double DisplayPipeLineDeliveryTimeLuma;
5771 double DisplayPipeLineDeliveryTimeChroma;
5772 double ReadBandwidth = 0.0;
5775 for (k = 0; k < NumberOfActivePlanes; ++k) {
5777 if (VRatio[k] <= 1) {
5778 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5780 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5782 if (BytePerPixelC[k] == 0) {
5783 DisplayPipeLineDeliveryTimeChroma = 0;
5785 if (VRatioChroma[k] <= 1) {
5786 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5788 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5792 if (BytePerPixelC[k] > 0) {
5793 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
5794 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5796 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5798 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5802 for (k = 0; k < NumberOfActivePlanes; ++k) {
5803 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5806 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
5808 for (k = 0; k < NumberOfActivePlanes; ++k) {
5809 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]);
5813 static void CalculateUrgentBurstFactor(
5814 int swath_width_luma_ub,
5815 int swath_width_chroma_ub,
5816 unsigned int SwathHeightY,
5817 unsigned int SwathHeightC,
5819 double UrgentLatency,
5820 double CursorBufferSize,
5821 unsigned int CursorWidth,
5822 unsigned int CursorBPP,
5825 double BytePerPixelInDETY,
5826 double BytePerPixelInDETC,
5827 double DETBufferSizeY,
5828 double DETBufferSizeC,
5829 double *UrgentBurstFactorCursor,
5830 double *UrgentBurstFactorLuma,
5831 double *UrgentBurstFactorChroma,
5832 bool *NotEnoughUrgentLatencyHiding)
5834 double LinesInDETLuma;
5835 double LinesInDETChroma;
5836 unsigned int LinesInCursorBuffer;
5837 double CursorBufferSizeInTime;
5838 double DETBufferSizeInTimeLuma;
5839 double DETBufferSizeInTimeChroma;
5841 *NotEnoughUrgentLatencyHiding = 0;
5843 if (CursorWidth > 0) {
5844 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5846 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5847 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5848 *NotEnoughUrgentLatencyHiding = 1;
5849 *UrgentBurstFactorCursor = 0;
5851 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5854 *UrgentBurstFactorCursor = 1;
5858 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5860 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5861 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5862 *NotEnoughUrgentLatencyHiding = 1;
5863 *UrgentBurstFactorLuma = 0;
5865 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5868 *UrgentBurstFactorLuma = 1;
5871 if (BytePerPixelInDETC > 0) {
5872 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5874 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5875 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5876 *NotEnoughUrgentLatencyHiding = 1;
5877 *UrgentBurstFactorChroma = 0;
5879 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5882 *UrgentBurstFactorChroma = 1;
5887 static void CalculatePixelDeliveryTimes(
5888 unsigned int NumberOfActivePlanes,
5890 double VRatioChroma[],
5891 double VRatioPrefetchY[],
5892 double VRatioPrefetchC[],
5893 unsigned int swath_width_luma_ub[],
5894 unsigned int swath_width_chroma_ub[],
5895 unsigned int DPPPerPlane[],
5897 double HRatioChroma[],
5898 double PixelClock[],
5899 double PSCL_THROUGHPUT[],
5900 double PSCL_THROUGHPUT_CHROMA[],
5902 int BytePerPixelC[],
5903 enum scan_direction_class SourceScan[],
5904 unsigned int NumberOfCursors[],
5905 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
5906 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
5907 unsigned int BlockWidth256BytesY[],
5908 unsigned int BlockHeight256BytesY[],
5909 unsigned int BlockWidth256BytesC[],
5910 unsigned int BlockHeight256BytesC[],
5911 double DisplayPipeLineDeliveryTimeLuma[],
5912 double DisplayPipeLineDeliveryTimeChroma[],
5913 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
5914 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
5915 double DisplayPipeRequestDeliveryTimeLuma[],
5916 double DisplayPipeRequestDeliveryTimeChroma[],
5917 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
5918 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
5919 double CursorRequestDeliveryTime[],
5920 double CursorRequestDeliveryTimePrefetch[])
5922 double req_per_swath_ub;
5925 for (k = 0; k < NumberOfActivePlanes; ++k) {
5926 if (VRatio[k] <= 1) {
5927 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5929 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5932 if (BytePerPixelC[k] == 0) {
5933 DisplayPipeLineDeliveryTimeChroma[k] = 0;
5935 if (VRatioChroma[k] <= 1) {
5936 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5938 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5942 if (VRatioPrefetchY[k] <= 1) {
5943 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5945 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5948 if (BytePerPixelC[k] == 0) {
5949 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
5951 if (VRatioPrefetchC[k] <= 1) {
5952 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5954 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5959 for (k = 0; k < NumberOfActivePlanes; ++k) {
5960 if (SourceScan[k] != dm_vert) {
5961 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
5963 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
5965 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
5966 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
5967 if (BytePerPixelC[k] == 0) {
5968 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
5969 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
5971 if (SourceScan[k] != dm_vert) {
5972 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
5974 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
5976 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
5977 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
5979 #ifdef __DML_VBA_DEBUG__
5980 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
5981 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
5982 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
5983 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
5984 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
5985 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
5986 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
5987 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
5988 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
5989 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
5990 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
5991 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
5995 for (k = 0; k < NumberOfActivePlanes; ++k) {
5996 int cursor_req_per_width;
5997 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
5998 if (NumberOfCursors[k] > 0) {
5999 if (VRatio[k] <= 1) {
6000 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6002 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6004 if (VRatioPrefetchY[k] <= 1) {
6005 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6007 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6010 CursorRequestDeliveryTime[k] = 0;
6011 CursorRequestDeliveryTimePrefetch[k] = 0;
6013 #ifdef __DML_VBA_DEBUG__
6014 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
6015 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
6016 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
6021 static void CalculateMetaAndPTETimes(
6022 int NumberOfActivePlanes,
6025 int MinMetaChunkSizeBytes,
6028 double VRatioChroma[],
6029 double DestinationLinesToRequestRowInVBlank[],
6030 double DestinationLinesToRequestRowInImmediateFlip[],
6032 double PixelClock[],
6033 int BytePerPixelY[],
6034 int BytePerPixelC[],
6035 enum scan_direction_class SourceScan[],
6036 int dpte_row_height[],
6037 int dpte_row_height_chroma[],
6038 int meta_row_width[],
6039 int meta_row_width_chroma[],
6040 int meta_row_height[],
6041 int meta_row_height_chroma[],
6042 int meta_req_width[],
6043 int meta_req_width_chroma[],
6044 int meta_req_height[],
6045 int meta_req_height_chroma[],
6046 int dpte_group_bytes[],
6047 int PTERequestSizeY[],
6048 int PTERequestSizeC[],
6049 int PixelPTEReqWidthY[],
6050 int PixelPTEReqHeightY[],
6051 int PixelPTEReqWidthC[],
6052 int PixelPTEReqHeightC[],
6053 int dpte_row_width_luma_ub[],
6054 int dpte_row_width_chroma_ub[],
6055 double DST_Y_PER_PTE_ROW_NOM_L[],
6056 double DST_Y_PER_PTE_ROW_NOM_C[],
6057 double DST_Y_PER_META_ROW_NOM_L[],
6058 double DST_Y_PER_META_ROW_NOM_C[],
6059 double TimePerMetaChunkNominal[],
6060 double TimePerChromaMetaChunkNominal[],
6061 double TimePerMetaChunkVBlank[],
6062 double TimePerChromaMetaChunkVBlank[],
6063 double TimePerMetaChunkFlip[],
6064 double TimePerChromaMetaChunkFlip[],
6065 double time_per_pte_group_nom_luma[],
6066 double time_per_pte_group_vblank_luma[],
6067 double time_per_pte_group_flip_luma[],
6068 double time_per_pte_group_nom_chroma[],
6069 double time_per_pte_group_vblank_chroma[],
6070 double time_per_pte_group_flip_chroma[])
6072 unsigned int meta_chunk_width;
6073 unsigned int min_meta_chunk_width;
6074 unsigned int meta_chunk_per_row_int;
6075 unsigned int meta_row_remainder;
6076 unsigned int meta_chunk_threshold;
6077 unsigned int meta_chunks_per_row_ub;
6078 unsigned int meta_chunk_width_chroma;
6079 unsigned int min_meta_chunk_width_chroma;
6080 unsigned int meta_chunk_per_row_int_chroma;
6081 unsigned int meta_row_remainder_chroma;
6082 unsigned int meta_chunk_threshold_chroma;
6083 unsigned int meta_chunks_per_row_ub_chroma;
6084 unsigned int dpte_group_width_luma;
6085 unsigned int dpte_groups_per_row_luma_ub;
6086 unsigned int dpte_group_width_chroma;
6087 unsigned int dpte_groups_per_row_chroma_ub;
6090 for (k = 0; k < NumberOfActivePlanes; ++k) {
6091 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
6092 if (BytePerPixelC[k] == 0) {
6093 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
6095 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
6097 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
6098 if (BytePerPixelC[k] == 0) {
6099 DST_Y_PER_META_ROW_NOM_C[k] = 0;
6101 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
6105 for (k = 0; k < NumberOfActivePlanes; ++k) {
6106 if (DCCEnable[k] == true) {
6107 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
6108 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
6109 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
6110 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
6111 if (SourceScan[k] != dm_vert) {
6112 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
6114 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
6116 if (meta_row_remainder <= meta_chunk_threshold) {
6117 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
6119 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
6121 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6122 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6123 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6124 if (BytePerPixelC[k] == 0) {
6125 TimePerChromaMetaChunkNominal[k] = 0;
6126 TimePerChromaMetaChunkVBlank[k] = 0;
6127 TimePerChromaMetaChunkFlip[k] = 0;
6129 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6130 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6131 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
6132 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
6133 if (SourceScan[k] != dm_vert) {
6134 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
6136 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
6138 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
6139 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
6141 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
6143 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6144 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6145 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6148 TimePerMetaChunkNominal[k] = 0;
6149 TimePerMetaChunkVBlank[k] = 0;
6150 TimePerMetaChunkFlip[k] = 0;
6151 TimePerChromaMetaChunkNominal[k] = 0;
6152 TimePerChromaMetaChunkVBlank[k] = 0;
6153 TimePerChromaMetaChunkFlip[k] = 0;
6157 for (k = 0; k < NumberOfActivePlanes; ++k) {
6158 if (GPUVMEnable == true) {
6159 if (SourceScan[k] != dm_vert) {
6160 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
6162 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
6164 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
6165 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6166 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6167 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6168 if (BytePerPixelC[k] == 0) {
6169 time_per_pte_group_nom_chroma[k] = 0;
6170 time_per_pte_group_vblank_chroma[k] = 0;
6171 time_per_pte_group_flip_chroma[k] = 0;
6173 if (SourceScan[k] != dm_vert) {
6174 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
6176 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
6178 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
6179 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6180 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6181 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6184 time_per_pte_group_nom_luma[k] = 0;
6185 time_per_pte_group_vblank_luma[k] = 0;
6186 time_per_pte_group_flip_luma[k] = 0;
6187 time_per_pte_group_nom_chroma[k] = 0;
6188 time_per_pte_group_vblank_chroma[k] = 0;
6189 time_per_pte_group_flip_chroma[k] = 0;
6194 static void CalculateVMGroupAndRequestTimes(
6195 unsigned int NumberOfActivePlanes,
6197 unsigned int GPUVMMaxPageTableLevels,
6198 unsigned int HTotal[],
6199 int BytePerPixelC[],
6200 double DestinationLinesToRequestVMInVBlank[],
6201 double DestinationLinesToRequestVMInImmediateFlip[],
6203 double PixelClock[],
6204 int dpte_row_width_luma_ub[],
6205 int dpte_row_width_chroma_ub[],
6206 int vm_group_bytes[],
6207 unsigned int dpde0_bytes_per_frame_ub_l[],
6208 unsigned int dpde0_bytes_per_frame_ub_c[],
6209 int meta_pte_bytes_per_frame_ub_l[],
6210 int meta_pte_bytes_per_frame_ub_c[],
6211 double TimePerVMGroupVBlank[],
6212 double TimePerVMGroupFlip[],
6213 double TimePerVMRequestVBlank[],
6214 double TimePerVMRequestFlip[])
6216 int num_group_per_lower_vm_stage;
6217 int num_req_per_lower_vm_stage;
6220 for (k = 0; k < NumberOfActivePlanes; ++k) {
6221 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6222 if (DCCEnable[k] == false) {
6223 if (BytePerPixelC[k] > 0) {
6224 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6225 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6227 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6230 if (GPUVMMaxPageTableLevels == 1) {
6231 if (BytePerPixelC[k] > 0) {
6232 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6233 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6235 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6238 if (BytePerPixelC[k] > 0) {
6239 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6240 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6241 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6242 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6244 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6245 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6250 if (DCCEnable[k] == false) {
6251 if (BytePerPixelC[k] > 0) {
6252 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6254 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6257 if (GPUVMMaxPageTableLevels == 1) {
6258 if (BytePerPixelC[k] > 0) {
6259 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6261 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6264 if (BytePerPixelC[k] > 0) {
6265 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
6266 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6268 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6273 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6274 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6275 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6276 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6278 if (GPUVMMaxPageTableLevels > 2) {
6279 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6280 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6281 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6282 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6286 TimePerVMGroupVBlank[k] = 0;
6287 TimePerVMGroupFlip[k] = 0;
6288 TimePerVMRequestVBlank[k] = 0;
6289 TimePerVMRequestFlip[k] = 0;
6294 static void CalculateStutterEfficiency(
6295 struct display_mode_lib *mode_lib,
6296 int CompressedBufferSizeInkByte,
6297 bool UnboundedRequestEnabled,
6298 int ConfigReturnBufferSizeInKByte,
6299 int MetaFIFOSizeInKEntries,
6300 int ZeroSizeBufferEntries,
6301 int NumberOfActivePlanes,
6302 int ROBBufferSizeInKByte,
6303 double TotalDataReadBandwidth,
6306 double COMPBUF_RESERVED_SPACE_64B,
6307 double COMPBUF_RESERVED_SPACE_ZS,
6309 double SRExitZ8Time,
6310 bool SynchronizedVBlank,
6311 double Z8StutterEnterPlusExitWatermark,
6312 double StutterEnterPlusExitWatermark,
6313 bool ProgressiveToInterlaceUnitInOPP,
6315 double MinTTUVBlank[],
6317 unsigned int DETBufferSizeY[],
6318 int BytePerPixelY[],
6319 double BytePerPixelDETY[],
6320 double SwathWidthY[],
6323 double NetDCCRateLuma[],
6324 double NetDCCRateChroma[],
6325 double DCCFractionOfZeroSizeRequestsLuma[],
6326 double DCCFractionOfZeroSizeRequestsChroma[],
6329 double PixelClock[],
6331 enum scan_direction_class SourceScan[],
6332 int BlockHeight256BytesY[],
6333 int BlockWidth256BytesY[],
6334 int BlockHeight256BytesC[],
6335 int BlockWidth256BytesC[],
6336 int DCCYMaxUncompressedBlock[],
6337 int DCCCMaxUncompressedBlock[],
6340 bool WritebackEnable[],
6341 double ReadBandwidthPlaneLuma[],
6342 double ReadBandwidthPlaneChroma[],
6343 double meta_row_bw[],
6344 double dpte_row_bw[],
6345 double *StutterEfficiencyNotIncludingVBlank,
6346 double *StutterEfficiency,
6347 int *NumberOfStutterBurstsPerFrame,
6348 double *Z8StutterEfficiencyNotIncludingVBlank,
6349 double *Z8StutterEfficiency,
6350 int *Z8NumberOfStutterBurstsPerFrame,
6351 double *StutterPeriod)
6353 struct vba_vars_st *v = &mode_lib->vba;
6355 double DETBufferingTimeY;
6356 double SwathWidthYCriticalPlane = 0;
6357 double VActiveTimeCriticalPlane = 0;
6358 double FrameTimeCriticalPlane = 0;
6359 int BytePerPixelYCriticalPlane = 0;
6360 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6361 double MinTTUVBlankCriticalPlane = 0;
6362 double TotalCompressedReadBandwidth;
6363 double TotalRowReadBandwidth;
6364 double AverageDCCCompressionRate;
6365 double EffectiveCompressedBufferSize;
6366 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
6367 double StutterBurstTime;
6368 int TotalActiveWriteback;
6370 double LinesInDETYRoundedDownToSwath;
6371 double MaximumEffectiveCompressionLuma;
6372 double MaximumEffectiveCompressionChroma;
6373 double TotalZeroSizeRequestReadBandwidth;
6374 double TotalZeroSizeCompressedReadBandwidth;
6375 double AverageDCCZeroSizeFraction;
6376 double AverageZeroSizeCompressionRate;
6377 int TotalNumberOfActiveOTG = 0;
6378 double LastStutterPeriod = 0.0;
6379 double LastZ8StutterPeriod = 0.0;
6382 TotalZeroSizeRequestReadBandwidth = 0;
6383 TotalZeroSizeCompressedReadBandwidth = 0;
6384 TotalRowReadBandwidth = 0;
6385 TotalCompressedReadBandwidth = 0;
6387 for (k = 0; k < NumberOfActivePlanes; ++k) {
6388 if (DCCEnable[k] == true) {
6389 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
6390 || DCCYMaxUncompressedBlock[k] < 256) {
6391 MaximumEffectiveCompressionLuma = 2;
6393 MaximumEffectiveCompressionLuma = 4;
6395 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
6396 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
6397 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6398 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
6399 if (ReadBandwidthPlaneChroma[k] > 0) {
6400 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6401 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
6402 MaximumEffectiveCompressionChroma = 2;
6404 MaximumEffectiveCompressionChroma = 4;
6406 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
6407 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
6408 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
6409 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6410 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
6413 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6415 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6418 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
6419 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
6421 #ifdef __DML_VBA_DEBUG__
6422 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
6423 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
6424 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
6425 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
6426 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
6427 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6428 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
6429 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
6432 if (AverageDCCZeroSizeFraction == 1) {
6433 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6434 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
6435 } else if (AverageDCCZeroSizeFraction > 0) {
6436 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6437 EffectiveCompressedBufferSize = dml_min(
6438 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6439 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
6440 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
6441 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6442 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6444 "DML::%s: min 2 = %f\n",
6446 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
6447 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6448 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6450 EffectiveCompressedBufferSize = dml_min(
6451 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6452 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
6453 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6454 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
6457 #ifdef __DML_VBA_DEBUG__
6458 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
6459 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
6460 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6464 for (k = 0; k < NumberOfActivePlanes; ++k) {
6465 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
6466 / BytePerPixelDETY[k] / SwathWidthY[k];
6467 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
6468 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
6469 #ifdef __DML_VBA_DEBUG__
6470 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
6471 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
6472 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
6473 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
6474 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
6475 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
6476 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
6477 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
6478 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6479 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
6480 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
6481 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6484 if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
6485 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
6487 *StutterPeriod = DETBufferingTimeY;
6488 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
6489 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
6490 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6491 SwathWidthYCriticalPlane = SwathWidthY[k];
6492 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
6493 MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
6495 #ifdef __DML_VBA_DEBUG__
6496 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6497 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
6498 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
6499 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6500 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
6501 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
6502 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
6507 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize);
6508 #ifdef __DML_VBA_DEBUG__
6509 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
6510 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6511 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
6512 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
6513 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6514 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
6515 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6516 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
6517 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
6518 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
6521 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
6522 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
6523 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6524 #ifdef __DML_VBA_DEBUG__
6525 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
6526 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
6527 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
6528 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
6529 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6531 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6534 "DML::%s: Time to finish residue swath=%f\n",
6536 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6538 TotalActiveWriteback = 0;
6539 for (k = 0; k < NumberOfActivePlanes; ++k) {
6540 if (WritebackEnable[k]) {
6541 TotalActiveWriteback = TotalActiveWriteback + 1;
6545 if (TotalActiveWriteback == 0) {
6546 #ifdef __DML_VBA_DEBUG__
6547 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
6548 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
6549 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
6550 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6552 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
6553 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
6554 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6555 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6557 *StutterEfficiencyNotIncludingVBlank = 0.;
6558 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
6559 *NumberOfStutterBurstsPerFrame = 0;
6560 *Z8NumberOfStutterBurstsPerFrame = 0;
6562 #ifdef __DML_VBA_DEBUG__
6563 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6564 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6565 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
6566 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
6567 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6570 for (k = 0; k < NumberOfActivePlanes; ++k) {
6571 if (v->BlendingAndTiming[k] == k) {
6572 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6576 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6577 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6579 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
6580 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
6581 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6583 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6586 *StutterEfficiency = 0;
6589 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6590 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6591 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
6592 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
6593 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6595 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6598 *Z8StutterEfficiency = 0.;
6601 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6602 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6603 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6604 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6605 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6606 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6607 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6608 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6611 static void CalculateSwathAndDETConfiguration(
6612 bool ForceSingleDPP,
6613 int NumberOfActivePlanes,
6614 unsigned int DETBufferSizeInKByte,
6615 double MaximumSwathWidthLuma[],
6616 double MaximumSwathWidthChroma[],
6617 enum scan_direction_class SourceScan[],
6618 enum source_format_class SourcePixelFormat[],
6619 enum dm_swizzle_mode SurfaceTiling[],
6620 int ViewportWidth[],
6621 int ViewportHeight[],
6622 int SurfaceWidthY[],
6623 int SurfaceWidthC[],
6624 int SurfaceHeightY[],
6625 int SurfaceHeightC[],
6626 int Read256BytesBlockHeightY[],
6627 int Read256BytesBlockHeightC[],
6628 int Read256BytesBlockWidthY[],
6629 int Read256BytesBlockWidthC[],
6630 enum odm_combine_mode ODMCombineEnabled[],
6631 int BlendingAndTiming[],
6634 double BytePerPixDETY[],
6635 double BytePerPixDETC[],
6638 double HRatioChroma[],
6640 int swath_width_luma_ub[],
6641 int swath_width_chroma_ub[],
6642 double SwathWidth[],
6643 double SwathWidthChroma[],
6646 unsigned int DETBufferSizeY[],
6647 unsigned int DETBufferSizeC[],
6648 bool ViewportSizeSupportPerPlane[],
6649 bool *ViewportSizeSupport)
6651 int MaximumSwathHeightY[DC__NUM_DPP__MAX];
6652 int MaximumSwathHeightC[DC__NUM_DPP__MAX];
6653 int MinimumSwathHeightY;
6654 int MinimumSwathHeightC;
6655 int RoundedUpMaxSwathSizeBytesY;
6656 int RoundedUpMaxSwathSizeBytesC;
6657 int RoundedUpMinSwathSizeBytesY;
6658 int RoundedUpMinSwathSizeBytesC;
6659 int RoundedUpSwathSizeBytesY;
6660 int RoundedUpSwathSizeBytesC;
6661 double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
6662 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
6665 CalculateSwathWidth(
6667 NumberOfActivePlanes,
6679 Read256BytesBlockHeightY,
6680 Read256BytesBlockHeightC,
6681 Read256BytesBlockWidthY,
6682 Read256BytesBlockWidthC,
6687 SwathWidthSingleDPP,
6688 SwathWidthSingleDPPChroma,
6691 MaximumSwathHeightY,
6692 MaximumSwathHeightC,
6693 swath_width_luma_ub,
6694 swath_width_chroma_ub);
6696 *ViewportSizeSupport = true;
6697 for (k = 0; k < NumberOfActivePlanes; ++k) {
6698 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
6699 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
6700 if (SurfaceTiling[k] == dm_sw_linear
6701 || (SourcePixelFormat[k] == dm_444_64
6702 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6703 && SourceScan[k] != dm_vert)) {
6704 MinimumSwathHeightY = MaximumSwathHeightY[k];
6705 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6706 MinimumSwathHeightY = MaximumSwathHeightY[k];
6708 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6710 MinimumSwathHeightC = MaximumSwathHeightC[k];
6712 if (SurfaceTiling[k] == dm_sw_linear) {
6713 MinimumSwathHeightY = MaximumSwathHeightY[k];
6714 MinimumSwathHeightC = MaximumSwathHeightC[k];
6715 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
6716 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6717 MinimumSwathHeightC = MaximumSwathHeightC[k];
6718 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6719 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6720 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6721 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6722 MinimumSwathHeightY = MaximumSwathHeightY[k];
6723 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6725 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6726 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6730 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
6731 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
6732 if (SourcePixelFormat[k] == dm_420_10) {
6733 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6734 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6736 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
6737 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
6738 if (SourcePixelFormat[k] == dm_420_10) {
6739 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6740 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6743 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6744 SwathHeightY[k] = MaximumSwathHeightY[k];
6745 SwathHeightC[k] = MaximumSwathHeightC[k];
6746 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6747 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6748 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6749 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6750 SwathHeightY[k] = MinimumSwathHeightY;
6751 SwathHeightC[k] = MaximumSwathHeightC[k];
6752 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6753 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6754 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6755 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6756 SwathHeightY[k] = MaximumSwathHeightY[k];
6757 SwathHeightC[k] = MinimumSwathHeightC;
6758 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6759 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6761 SwathHeightY[k] = MinimumSwathHeightY;
6762 SwathHeightC[k] = MinimumSwathHeightC;
6763 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6764 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6767 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
6768 if (SwathHeightC[k] == 0) {
6769 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
6770 DETBufferSizeC[k] = 0;
6771 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6772 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
6773 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
6775 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024);
6776 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
6779 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6780 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6781 *ViewportSizeSupport = false;
6782 ViewportSizeSupportPerPlane[k] = false;
6784 ViewportSizeSupportPerPlane[k] = true;
6790 static void CalculateSwathWidth(
6791 bool ForceSingleDPP,
6792 int NumberOfActivePlanes,
6793 enum source_format_class SourcePixelFormat[],
6794 enum scan_direction_class SourceScan[],
6795 int ViewportWidth[],
6796 int ViewportHeight[],
6797 int SurfaceWidthY[],
6798 int SurfaceWidthC[],
6799 int SurfaceHeightY[],
6800 int SurfaceHeightC[],
6801 enum odm_combine_mode ODMCombineEnabled[],
6804 int Read256BytesBlockHeightY[],
6805 int Read256BytesBlockHeightC[],
6806 int Read256BytesBlockWidthY[],
6807 int Read256BytesBlockWidthC[],
6808 int BlendingAndTiming[],
6812 double SwathWidthSingleDPPY[],
6813 double SwathWidthSingleDPPC[],
6814 double SwathWidthY[],
6815 double SwathWidthC[],
6816 int MaximumSwathHeightY[],
6817 int MaximumSwathHeightC[],
6818 int swath_width_luma_ub[],
6819 int swath_width_chroma_ub[])
6821 enum odm_combine_mode MainPlaneODMCombine;
6824 #ifdef __DML_VBA_DEBUG__
6825 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
6828 for (k = 0; k < NumberOfActivePlanes; ++k) {
6829 if (SourceScan[k] != dm_vert) {
6830 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6832 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6835 #ifdef __DML_VBA_DEBUG__
6836 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
6837 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
6840 MainPlaneODMCombine = ODMCombineEnabled[k];
6841 for (j = 0; j < NumberOfActivePlanes; ++j) {
6842 if (BlendingAndTiming[k] == j) {
6843 MainPlaneODMCombine = ODMCombineEnabled[j];
6847 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
6848 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6849 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
6850 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6851 } else if (DPPPerPlane[k] == 2) {
6852 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6854 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6857 #ifdef __DML_VBA_DEBUG__
6858 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
6859 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
6862 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6863 SwathWidthC[k] = SwathWidthY[k] / 2;
6864 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6866 SwathWidthC[k] = SwathWidthY[k];
6867 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6870 if (ForceSingleDPP == true) {
6871 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6872 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6875 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6876 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6878 #ifdef __DML_VBA_DEBUG__
6879 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
6882 if (SourceScan[k] != dm_vert) {
6883 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6884 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6885 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6886 if (BytePerPixC[k] > 0) {
6887 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6889 swath_width_chroma_ub[k] = dml_min(
6891 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
6893 swath_width_chroma_ub[k] = 0;
6896 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
6897 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
6898 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
6899 if (BytePerPixC[k] > 0) {
6900 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6902 swath_width_chroma_ub[k] = dml_min(
6903 surface_height_ub_c,
6904 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
6906 swath_width_chroma_ub[k] = 0;
6913 static double CalculateExtraLatency(
6914 int RoundTripPingLatencyCycles,
6915 int ReorderingBytes,
6917 int TotalNumberOfActiveDPP,
6918 int PixelChunkSizeInKByte,
6919 int TotalNumberOfDCCActiveDPP,
6924 int NumberOfActivePlanes,
6926 int dpte_group_bytes[],
6927 double HostVMInefficiencyFactor,
6928 double HostVMMinPageSize,
6929 int HostVMMaxNonCachedPageTableLevels)
6931 double ExtraLatencyBytes;
6932 double ExtraLatency;
6934 ExtraLatencyBytes = CalculateExtraLatencyBytes(
6936 TotalNumberOfActiveDPP,
6937 PixelChunkSizeInKByte,
6938 TotalNumberOfDCCActiveDPP,
6942 NumberOfActivePlanes,
6945 HostVMInefficiencyFactor,
6947 HostVMMaxNonCachedPageTableLevels);
6949 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
6951 #ifdef __DML_VBA_DEBUG__
6952 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
6953 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
6954 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
6955 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
6956 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
6959 return ExtraLatency;
6962 static double CalculateExtraLatencyBytes(
6963 int ReorderingBytes,
6964 int TotalNumberOfActiveDPP,
6965 int PixelChunkSizeInKByte,
6966 int TotalNumberOfDCCActiveDPP,
6970 int NumberOfActivePlanes,
6972 int dpte_group_bytes[],
6973 double HostVMInefficiencyFactor,
6974 double HostVMMinPageSize,
6975 int HostVMMaxNonCachedPageTableLevels)
6978 int HostVMDynamicLevels = 0, k;
6980 if (GPUVMEnable == true && HostVMEnable == true) {
6981 if (HostVMMinPageSize < 2048) {
6982 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
6983 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
6984 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
6986 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
6989 HostVMDynamicLevels = 0;
6992 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
6994 if (GPUVMEnable == true) {
6995 for (k = 0; k < NumberOfActivePlanes; ++k) {
6996 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
7002 static double CalculateUrgentLatency(
7003 double UrgentLatencyPixelDataOnly,
7004 double UrgentLatencyPixelMixedWithVMData,
7005 double UrgentLatencyVMDataOnly,
7006 bool DoUrgentLatencyAdjustment,
7007 double UrgentLatencyAdjustmentFabricClockComponent,
7008 double UrgentLatencyAdjustmentFabricClockReference,
7013 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
7014 if (DoUrgentLatencyAdjustment == true) {
7015 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
7020 static void UseMinimumDCFCLK(
7021 struct display_mode_lib *mode_lib,
7022 int MaxPrefetchMode,
7023 int ReorderingBytes)
7025 struct vba_vars_st *v = &mode_lib->vba;
7026 int dummy1, i, j, k;
7027 double NormalEfficiency, dummy2, dummy3;
7028 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
7030 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
7031 for (i = 0; i < v->soc.num_states; ++i) {
7032 for (j = 0; j <= 1; ++j) {
7033 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
7034 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
7035 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
7036 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
7037 double MinimumTWait;
7038 double NonDPTEBandwidth;
7039 double DPTEBandwidth;
7040 double DCFCLKRequiredForAverageBandwidth;
7041 double ExtraLatencyBytes;
7042 double ExtraLatencyCycles;
7043 double DCFCLKRequiredForPeakBandwidth;
7044 int NoOfDPPState[DC__NUM_DPP__MAX];
7045 double MinimumTvmPlus2Tr0;
7047 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
7048 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7049 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
7050 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
7053 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
7054 NoOfDPPState[k] = v->NoOfDPP[i][j][k];
7057 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
7058 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
7059 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
7060 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
7061 DCFCLKRequiredForAverageBandwidth = dml_max3(
7062 v->ProjectedDCFCLKDeepSleep[i][j],
7063 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
7064 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
7065 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth);
7067 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7069 v->TotalNumberOfActiveDPP[i][j],
7070 v->PixelChunkSizeInKByte,
7071 v->TotalNumberOfDCCActiveDPP[i][j],
7075 v->NumberOfActivePlanes,
7077 v->dpte_group_bytes,
7079 v->HostVMMinPageSize,
7080 v->HostVMMaxNonCachedPageTableLevels);
7081 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
7082 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7083 double DCFCLKCyclesRequiredInPrefetch;
7084 double ExpectedPrefetchBWAcceleration;
7085 double PrefetchTime;
7087 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
7088 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
7089 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
7090 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
7091 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth
7092 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
7093 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
7094 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
7095 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
7096 DynamicMetadataVMExtraLatency[k] =
7097 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
7098 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
7099 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
7101 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2)
7102 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
7103 - DynamicMetadataVMExtraLatency[k];
7105 if (PrefetchTime > 0) {
7106 double ExpectedVRatioPrefetch;
7107 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
7108 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
7109 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
7110 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
7111 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
7112 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
7113 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth;
7116 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7118 if (v->DynamicMetadataEnable[k] == true) {
7123 double AllowedTimeForUrgentExtraLatency;
7125 CalculateVupdateAndDynamicMetadataParameters(
7126 v->MaxInterDCNTileRepeaters,
7127 v->RequiredDPPCLK[i][j][k],
7128 v->RequiredDISPCLK[i][j],
7129 v->ProjectedDCFCLKDeepSleep[i][j],
7132 v->VTotal[k] - v->VActive[k],
7133 v->DynamicMetadataTransmittedBytes[k],
7134 v->DynamicMetadataLinesBeforeActiveRequired[k],
7136 v->ProgressiveToInterlaceUnitInOPP,
7144 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
7145 - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
7146 if (AllowedTimeForUrgentExtraLatency > 0) {
7147 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
7148 DCFCLKRequiredForPeakBandwidthPerPlane[k],
7149 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
7151 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7155 DCFCLKRequiredForPeakBandwidth = 0;
7156 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
7157 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
7159 MinimumTvmPlus2Tr0 = v->UrgLatency[i]
7160 * (v->GPUVMEnable == true ?
7161 (v->HostVMEnable == true ?
7162 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) :
7164 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7165 double MaximumTvmPlus2Tr0PlusTsw;
7166 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
7167 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
7168 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
7170 DCFCLKRequiredForPeakBandwidth = dml_max3(
7171 DCFCLKRequiredForPeakBandwidth,
7172 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
7173 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
7176 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
7181 static void CalculateUnboundedRequestAndCompressedBufferSize(
7182 unsigned int DETBufferSizeInKByte,
7183 int ConfigReturnBufferSizeInKByte,
7184 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
7186 bool NoChromaPlanes,
7188 int CompressedBufferSegmentSizeInkByteFinal,
7189 enum output_encoder_class *Output,
7190 bool *UnboundedRequestEnabled,
7191 int *CompressedBufferSizeInkByte)
7193 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
7195 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]);
7196 *CompressedBufferSizeInkByte = (
7197 *UnboundedRequestEnabled == true ?
7198 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
7199 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
7200 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
7202 #ifdef __DML_VBA_DEBUG__
7203 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
7204 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
7205 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
7206 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
7207 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
7208 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
7209 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
7213 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
7215 bool ret_val = false;
7217 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
7218 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) {