1 // SPDX-License-Identifier: MIT
3 * Copyright 2022 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
31 #include "../display_mode_lib.h"
32 #include "display_mode_vba_314.h"
33 #include "../dml_inline_defs.h"
37 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
39 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
40 * ways. Unless there is something clearly wrong with it the code should
41 * remain as-is as it provides us with a guarantee from HW that it is correct.
45 #define BPP_BLENDED_PIPE 0xffffffff
46 #define DCN314_MAX_DSC_IMAGE_WIDTH 5184
47 #define DCN314_MAX_FMT_420_BUFFER_WIDTH 4096
49 // For DML-C changes that hasn't been propagated to VBA yet
50 //#define __DML_VBA_ALLOW_DELTA__
52 // Move these to ip parameters/constant
54 // At which vstartup the DML start to try if the mode can be supported
55 #define __DML_VBA_MIN_VSTARTUP__ 9
57 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
58 #define __DML_ARB_TO_RET_DELAY__ (7 + 95)
60 // fudge factor for min dcfclk calclation
61 #define __DML_MIN_DCFCLK_FACTOR__ 1.15
67 double DCFCLKDeepSleep;
68 unsigned int DPPPerPlane;
72 enum scan_direction_class SourceScan;
73 unsigned int BlockWidth256BytesY;
74 unsigned int BlockHeight256BytesY;
75 unsigned int BlockWidth256BytesC;
76 unsigned int BlockHeight256BytesC;
77 unsigned int InterlaceEnable;
78 unsigned int NumberOfCursors;
81 unsigned int DCCEnable;
82 bool ODMCombineIsEnabled;
83 enum source_format_class SourcePixelFormat;
86 bool ProgressiveToInterlaceUnitInOPP;
90 #define BPP_BLENDED_PIPE 0xffffffff
92 static bool CalculateBytePerPixelAnd256BBlockSizes(
93 enum source_format_class SourcePixelFormat,
94 enum dm_swizzle_mode SurfaceTiling,
95 unsigned int *BytePerPixelY,
96 unsigned int *BytePerPixelC,
97 double *BytePerPixelDETY,
98 double *BytePerPixelDETC,
99 unsigned int *BlockHeight256BytesY,
100 unsigned int *BlockHeight256BytesC,
101 unsigned int *BlockWidth256BytesY,
102 unsigned int *BlockWidth256BytesC);
103 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
104 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
105 static unsigned int dscceComputeDelay(
108 unsigned int sliceWidth,
109 unsigned int numSlices,
110 enum output_format_class pixelFormat,
111 enum output_encoder_class Output);
112 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
113 static bool CalculatePrefetchSchedule(
114 struct display_mode_lib *mode_lib,
115 double HostVMInefficiencyFactor,
117 unsigned int DSCDelay,
118 double DPPCLKDelaySubtotalPlusCNVCFormater,
119 double DPPCLKDelaySCL,
120 double DPPCLKDelaySCLLBOnly,
121 double DPPCLKDelayCNVCCursor,
122 double DISPCLKDelaySubtotal,
123 unsigned int DPP_RECOUT_WIDTH,
124 enum output_format_class OutputFormat,
125 unsigned int MaxInterDCNTileRepeaters,
126 unsigned int VStartup,
127 unsigned int MaxVStartup,
128 unsigned int GPUVMPageTableLevels,
131 unsigned int HostVMMaxNonCachedPageTableLevels,
132 double HostVMMinPageSize,
133 bool DynamicMetadataEnable,
134 bool DynamicMetadataVMEnabled,
135 int DynamicMetadataLinesBeforeActiveRequired,
136 unsigned int DynamicMetadataTransmittedBytes,
137 double UrgentLatency,
138 double UrgentExtraLatency,
140 unsigned int PDEAndMetaPTEBytesFrame,
141 unsigned int MetaRowByte,
142 unsigned int PixelPTEBytesPerRow,
143 double PrefetchSourceLinesY,
144 unsigned int SwathWidthY,
145 double VInitPreFillY,
146 unsigned int MaxNumSwathY,
147 double PrefetchSourceLinesC,
148 unsigned int SwathWidthC,
149 double VInitPreFillC,
150 unsigned int MaxNumSwathC,
151 int swath_width_luma_ub,
152 int swath_width_chroma_ub,
153 unsigned int SwathHeightY,
154 unsigned int SwathHeightC,
156 double *DSTXAfterScaler,
157 double *DSTYAfterScaler,
158 double *DestinationLinesForPrefetch,
159 double *PrefetchBandwidth,
160 double *DestinationLinesToRequestVMInVBlank,
161 double *DestinationLinesToRequestRowInVBlank,
162 double *VRatioPrefetchY,
163 double *VRatioPrefetchC,
164 double *RequiredPrefetchPixDataBWLuma,
165 double *RequiredPrefetchPixDataBWChroma,
166 bool *NotEnoughTimeForDynamicMetadata,
168 double *prefetch_vmrow_bw,
172 int *VUpdateOffsetPix,
173 double *VUpdateWidthPix,
174 double *VReadyOffsetPix);
175 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
176 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
177 static void CalculateDCCConfiguration(
179 bool DCCProgrammingAssumesScanDirectionUnknown,
180 enum source_format_class SourcePixelFormat,
181 unsigned int SurfaceWidthLuma,
182 unsigned int SurfaceWidthChroma,
183 unsigned int SurfaceHeightLuma,
184 unsigned int SurfaceHeightChroma,
185 double DETBufferSize,
186 unsigned int RequestHeight256ByteLuma,
187 unsigned int RequestHeight256ByteChroma,
188 enum dm_swizzle_mode TilingFormat,
189 unsigned int BytePerPixelY,
190 unsigned int BytePerPixelC,
191 double BytePerPixelDETY,
192 double BytePerPixelDETC,
193 enum scan_direction_class ScanOrientation,
194 unsigned int *MaxUncompressedBlockLuma,
195 unsigned int *MaxUncompressedBlockChroma,
196 unsigned int *MaxCompressedBlockLuma,
197 unsigned int *MaxCompressedBlockChroma,
198 unsigned int *IndependentBlockLuma,
199 unsigned int *IndependentBlockChroma);
200 static double CalculatePrefetchSourceLines(
201 struct display_mode_lib *mode_lib,
205 bool ProgressiveToInterlaceUnitInOPP,
206 unsigned int SwathHeight,
207 unsigned int ViewportYStart,
208 double *VInitPreFill,
209 unsigned int *MaxNumSwath);
210 static unsigned int CalculateVMAndRowBytes(
211 struct display_mode_lib *mode_lib,
213 unsigned int BlockHeight256Bytes,
214 unsigned int BlockWidth256Bytes,
215 enum source_format_class SourcePixelFormat,
216 unsigned int SurfaceTiling,
217 unsigned int BytePerPixel,
218 enum scan_direction_class ScanDirection,
219 unsigned int SwathWidth,
220 unsigned int ViewportHeight,
223 unsigned int HostVMMaxNonCachedPageTableLevels,
224 unsigned int GPUVMMinPageSize,
225 unsigned int HostVMMinPageSize,
226 unsigned int PTEBufferSizeInRequests,
228 unsigned int DCCMetaPitch,
229 unsigned int *MacroTileWidth,
230 unsigned int *MetaRowByte,
231 unsigned int *PixelPTEBytesPerRow,
232 bool *PTEBufferSizeNotExceeded,
233 int *dpte_row_width_ub,
234 unsigned int *dpte_row_height,
235 unsigned int *MetaRequestWidth,
236 unsigned int *MetaRequestHeight,
237 unsigned int *meta_row_width,
238 unsigned int *meta_row_height,
240 unsigned int *dpte_group_bytes,
241 unsigned int *PixelPTEReqWidth,
242 unsigned int *PixelPTEReqHeight,
243 unsigned int *PTERequestSize,
244 int *DPDE0BytesFrame,
245 int *MetaPTEBytesFrame);
246 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
247 static void CalculateRowBandwidth(
249 enum source_format_class SourcePixelFormat,
254 unsigned int MetaRowByteLuma,
255 unsigned int MetaRowByteChroma,
256 unsigned int meta_row_height_luma,
257 unsigned int meta_row_height_chroma,
258 unsigned int PixelPTEBytesPerRowLuma,
259 unsigned int PixelPTEBytesPerRowChroma,
260 unsigned int dpte_row_height_luma,
261 unsigned int dpte_row_height_chroma,
263 double *dpte_row_bw);
265 static void CalculateFlipSchedule(
266 struct display_mode_lib *mode_lib,
268 double HostVMInefficiencyFactor,
269 double UrgentExtraLatency,
270 double UrgentLatency,
271 double PDEAndMetaPTEBytesPerFrame,
273 double DPTEBytesPerRow);
274 static double CalculateWriteBackDelay(
275 enum source_format_class WritebackPixelFormat,
276 double WritebackHRatio,
277 double WritebackVRatio,
278 unsigned int WritebackVTaps,
279 int WritebackDestinationWidth,
280 int WritebackDestinationHeight,
281 int WritebackSourceHeight,
282 unsigned int HTotal);
284 static void CalculateVupdateAndDynamicMetadataParameters(
285 int MaxInterDCNTileRepeaters,
288 double DCFClkDeepSleep,
292 int DynamicMetadataTransmittedBytes,
293 int DynamicMetadataLinesBeforeActiveRequired,
295 bool ProgressiveToInterlaceUnitInOPP,
300 int *VUpdateOffsetPix,
301 double *VUpdateWidthPix,
302 double *VReadyOffsetPix);
304 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
305 struct display_mode_lib *mode_lib,
306 unsigned int PrefetchMode,
309 double UrgentLatency,
312 double DCFCLKDeepSleep,
313 unsigned int DETBufferSizeY[],
314 unsigned int DETBufferSizeC[],
315 unsigned int SwathHeightY[],
316 unsigned int SwathHeightC[],
317 double SwathWidthY[],
318 double SwathWidthC[],
319 unsigned int DPPPerPlane[],
320 double BytePerPixelDETY[],
321 double BytePerPixelDETC[],
322 bool UnboundedRequestEnabled,
323 unsigned int CompressedBufferSizeInkByte,
324 enum clock_change_support *DRAMClockChangeSupport,
325 double *StutterExitWatermark,
326 double *StutterEnterPlusExitWatermark,
327 double *Z8StutterExitWatermark,
328 double *Z8StutterEnterPlusExitWatermark);
330 static void CalculateDCFCLKDeepSleep(
331 struct display_mode_lib *mode_lib,
332 unsigned int NumberOfActivePlanes,
336 double VRatioChroma[],
337 double SwathWidthY[],
338 double SwathWidthC[],
339 unsigned int DPPPerPlane[],
341 double HRatioChroma[],
343 double PSCL_THROUGHPUT[],
344 double PSCL_THROUGHPUT_CHROMA[],
346 double ReadBandwidthLuma[],
347 double ReadBandwidthChroma[],
349 double *DCFCLKDeepSleep);
351 static void CalculateUrgentBurstFactor(
352 int swath_width_luma_ub,
353 int swath_width_chroma_ub,
354 unsigned int SwathHeightY,
355 unsigned int SwathHeightC,
357 double UrgentLatency,
358 double CursorBufferSize,
359 unsigned int CursorWidth,
360 unsigned int CursorBPP,
363 double BytePerPixelInDETY,
364 double BytePerPixelInDETC,
365 double DETBufferSizeY,
366 double DETBufferSizeC,
367 double *UrgentBurstFactorCursor,
368 double *UrgentBurstFactorLuma,
369 double *UrgentBurstFactorChroma,
370 bool *NotEnoughUrgentLatencyHiding);
372 static void UseMinimumDCFCLK(
373 struct display_mode_lib *mode_lib,
375 int ReorderingBytes);
377 static void CalculatePixelDeliveryTimes(
378 unsigned int NumberOfActivePlanes,
380 double VRatioChroma[],
381 double VRatioPrefetchY[],
382 double VRatioPrefetchC[],
383 unsigned int swath_width_luma_ub[],
384 unsigned int swath_width_chroma_ub[],
385 unsigned int DPPPerPlane[],
387 double HRatioChroma[],
389 double PSCL_THROUGHPUT[],
390 double PSCL_THROUGHPUT_CHROMA[],
393 enum scan_direction_class SourceScan[],
394 unsigned int NumberOfCursors[],
395 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
396 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
397 unsigned int BlockWidth256BytesY[],
398 unsigned int BlockHeight256BytesY[],
399 unsigned int BlockWidth256BytesC[],
400 unsigned int BlockHeight256BytesC[],
401 double DisplayPipeLineDeliveryTimeLuma[],
402 double DisplayPipeLineDeliveryTimeChroma[],
403 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
404 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
405 double DisplayPipeRequestDeliveryTimeLuma[],
406 double DisplayPipeRequestDeliveryTimeChroma[],
407 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
408 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
409 double CursorRequestDeliveryTime[],
410 double CursorRequestDeliveryTimePrefetch[]);
412 static void CalculateMetaAndPTETimes(
413 int NumberOfActivePlanes,
416 int MinMetaChunkSizeBytes,
419 double VRatioChroma[],
420 double DestinationLinesToRequestRowInVBlank[],
421 double DestinationLinesToRequestRowInImmediateFlip[],
426 enum scan_direction_class SourceScan[],
427 int dpte_row_height[],
428 int dpte_row_height_chroma[],
429 int meta_row_width[],
430 int meta_row_width_chroma[],
431 int meta_row_height[],
432 int meta_row_height_chroma[],
433 int meta_req_width[],
434 int meta_req_width_chroma[],
435 int meta_req_height[],
436 int meta_req_height_chroma[],
437 int dpte_group_bytes[],
438 int PTERequestSizeY[],
439 int PTERequestSizeC[],
440 int PixelPTEReqWidthY[],
441 int PixelPTEReqHeightY[],
442 int PixelPTEReqWidthC[],
443 int PixelPTEReqHeightC[],
444 int dpte_row_width_luma_ub[],
445 int dpte_row_width_chroma_ub[],
446 double DST_Y_PER_PTE_ROW_NOM_L[],
447 double DST_Y_PER_PTE_ROW_NOM_C[],
448 double DST_Y_PER_META_ROW_NOM_L[],
449 double DST_Y_PER_META_ROW_NOM_C[],
450 double TimePerMetaChunkNominal[],
451 double TimePerChromaMetaChunkNominal[],
452 double TimePerMetaChunkVBlank[],
453 double TimePerChromaMetaChunkVBlank[],
454 double TimePerMetaChunkFlip[],
455 double TimePerChromaMetaChunkFlip[],
456 double time_per_pte_group_nom_luma[],
457 double time_per_pte_group_vblank_luma[],
458 double time_per_pte_group_flip_luma[],
459 double time_per_pte_group_nom_chroma[],
460 double time_per_pte_group_vblank_chroma[],
461 double time_per_pte_group_flip_chroma[]);
463 static void CalculateVMGroupAndRequestTimes(
464 unsigned int NumberOfActivePlanes,
466 unsigned int GPUVMMaxPageTableLevels,
467 unsigned int HTotal[],
469 double DestinationLinesToRequestVMInVBlank[],
470 double DestinationLinesToRequestVMInImmediateFlip[],
473 int dpte_row_width_luma_ub[],
474 int dpte_row_width_chroma_ub[],
475 int vm_group_bytes[],
476 unsigned int dpde0_bytes_per_frame_ub_l[],
477 unsigned int dpde0_bytes_per_frame_ub_c[],
478 int meta_pte_bytes_per_frame_ub_l[],
479 int meta_pte_bytes_per_frame_ub_c[],
480 double TimePerVMGroupVBlank[],
481 double TimePerVMGroupFlip[],
482 double TimePerVMRequestVBlank[],
483 double TimePerVMRequestFlip[]);
485 static void CalculateStutterEfficiency(
486 struct display_mode_lib *mode_lib,
487 int CompressedBufferSizeInkByte,
488 bool UnboundedRequestEnabled,
489 int ConfigReturnBufferSizeInKByte,
490 int MetaFIFOSizeInKEntries,
491 int ZeroSizeBufferEntries,
492 int NumberOfActivePlanes,
493 int ROBBufferSizeInKByte,
494 double TotalDataReadBandwidth,
497 double COMPBUF_RESERVED_SPACE_64B,
498 double COMPBUF_RESERVED_SPACE_ZS,
501 bool SynchronizedVBlank,
502 double Z8StutterEnterPlusExitWatermark,
503 double StutterEnterPlusExitWatermark,
504 bool ProgressiveToInterlaceUnitInOPP,
506 double MinTTUVBlank[],
508 unsigned int DETBufferSizeY[],
510 double BytePerPixelDETY[],
511 double SwathWidthY[],
514 double NetDCCRateLuma[],
515 double NetDCCRateChroma[],
516 double DCCFractionOfZeroSizeRequestsLuma[],
517 double DCCFractionOfZeroSizeRequestsChroma[],
522 enum scan_direction_class SourceScan[],
523 int BlockHeight256BytesY[],
524 int BlockWidth256BytesY[],
525 int BlockHeight256BytesC[],
526 int BlockWidth256BytesC[],
527 int DCCYMaxUncompressedBlock[],
528 int DCCCMaxUncompressedBlock[],
531 bool WritebackEnable[],
532 double ReadBandwidthPlaneLuma[],
533 double ReadBandwidthPlaneChroma[],
534 double meta_row_bw[],
535 double dpte_row_bw[],
536 double *StutterEfficiencyNotIncludingVBlank,
537 double *StutterEfficiency,
538 int *NumberOfStutterBurstsPerFrame,
539 double *Z8StutterEfficiencyNotIncludingVBlank,
540 double *Z8StutterEfficiency,
541 int *Z8NumberOfStutterBurstsPerFrame,
542 double *StutterPeriod);
544 static void CalculateSwathAndDETConfiguration(
546 int NumberOfActivePlanes,
547 unsigned int DETBufferSizeInKByte,
548 double MaximumSwathWidthLuma[],
549 double MaximumSwathWidthChroma[],
550 enum scan_direction_class SourceScan[],
551 enum source_format_class SourcePixelFormat[],
552 enum dm_swizzle_mode SurfaceTiling[],
554 int ViewportHeight[],
557 int SurfaceHeightY[],
558 int SurfaceHeightC[],
559 int Read256BytesBlockHeightY[],
560 int Read256BytesBlockHeightC[],
561 int Read256BytesBlockWidthY[],
562 int Read256BytesBlockWidthC[],
563 enum odm_combine_mode ODMCombineEnabled[],
564 int BlendingAndTiming[],
567 double BytePerPixDETY[],
568 double BytePerPixDETC[],
571 double HRatioChroma[],
573 int swath_width_luma_ub[],
574 int swath_width_chroma_ub[],
576 double SwathWidthChroma[],
579 unsigned int DETBufferSizeY[],
580 unsigned int DETBufferSizeC[],
581 bool ViewportSizeSupportPerPlane[],
582 bool *ViewportSizeSupport);
583 static void CalculateSwathWidth(
585 int NumberOfActivePlanes,
586 enum source_format_class SourcePixelFormat[],
587 enum scan_direction_class SourceScan[],
589 int ViewportHeight[],
592 int SurfaceHeightY[],
593 int SurfaceHeightC[],
594 enum odm_combine_mode ODMCombineEnabled[],
597 int Read256BytesBlockHeightY[],
598 int Read256BytesBlockHeightC[],
599 int Read256BytesBlockWidthY[],
600 int Read256BytesBlockWidthC[],
601 int BlendingAndTiming[],
605 double SwathWidthSingleDPPY[],
606 double SwathWidthSingleDPPC[],
607 double SwathWidthY[],
608 double SwathWidthC[],
609 int MaximumSwathHeightY[],
610 int MaximumSwathHeightC[],
611 int swath_width_luma_ub[],
612 int swath_width_chroma_ub[]);
614 static double CalculateExtraLatency(
615 int RoundTripPingLatencyCycles,
618 int TotalNumberOfActiveDPP,
619 int PixelChunkSizeInKByte,
620 int TotalNumberOfDCCActiveDPP,
625 int NumberOfActivePlanes,
627 int dpte_group_bytes[],
628 double HostVMInefficiencyFactor,
629 double HostVMMinPageSize,
630 int HostVMMaxNonCachedPageTableLevels);
632 static double CalculateExtraLatencyBytes(
634 int TotalNumberOfActiveDPP,
635 int PixelChunkSizeInKByte,
636 int TotalNumberOfDCCActiveDPP,
640 int NumberOfActivePlanes,
642 int dpte_group_bytes[],
643 double HostVMInefficiencyFactor,
644 double HostVMMinPageSize,
645 int HostVMMaxNonCachedPageTableLevels);
647 static double CalculateUrgentLatency(
648 double UrgentLatencyPixelDataOnly,
649 double UrgentLatencyPixelMixedWithVMData,
650 double UrgentLatencyVMDataOnly,
651 bool DoUrgentLatencyAdjustment,
652 double UrgentLatencyAdjustmentFabricClockComponent,
653 double UrgentLatencyAdjustmentFabricClockReference,
654 double FabricClockSingle);
656 static void CalculateUnboundedRequestAndCompressedBufferSize(
657 unsigned int DETBufferSizeInKByte,
658 int ConfigReturnBufferSizeInKByte,
659 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
663 int CompressedBufferSegmentSizeInkByteFinal,
664 enum output_encoder_class *Output,
665 bool *UnboundedRequestEnabled,
666 int *CompressedBufferSizeInkByte);
668 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
669 static unsigned int CalculateMaxVStartup(
671 unsigned int VActive,
672 unsigned int VBlankNom,
675 bool ProgressiveTointerlaceUnitinOPP,
677 unsigned int VBlankNomDefaultUS,
678 double WritebackDelayTime);
680 void dml314_recalculate(struct display_mode_lib *mode_lib)
682 ModeSupportAndSystemConfiguration(mode_lib);
683 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
684 DisplayPipeConfiguration(mode_lib);
685 #ifdef __DML_VBA_DEBUG__
686 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
688 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
691 static unsigned int dscceComputeDelay(
694 unsigned int sliceWidth,
695 unsigned int numSlices,
696 enum output_format_class pixelFormat,
697 enum output_encoder_class Output)
699 // valid bpc = source bits per component in the set of {8, 10, 12}
700 // valid bpp = increments of 1/16 of a bit
701 // min = 6/7/8 in N420/N422/444, respectively
702 // max = such that compression is 1:1
703 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
704 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
705 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
708 unsigned int rcModelSize = 8192;
710 // N422/N420 operate at 2 pixels per clock
711 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
713 if (pixelFormat == dm_420)
715 else if (pixelFormat == dm_444)
717 else if (pixelFormat == dm_n422)
719 // #all other modes operate at 1 pixel per clock
723 //initial transmit delay as per PPS
724 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
734 //divide by pixel per cycle to compute slice width as seen by DSC
735 w = sliceWidth / pixelsPerClock;
737 //422 mode has an additional cycle of delay
738 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
743 //main calculation for the dscce
744 ix = initalXmitDelay + 45;
749 ax = (a + 2) / 3 + D + 6 + 1;
750 L = (ax + wx - 1) / wx;
751 if ((ix % w) == 0 && P != 0)
755 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
757 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
758 pixels = Delay * 3 * pixelsPerClock;
762 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
764 unsigned int Delay = 0;
766 if (pixelFormat == dm_420) {
771 // dscc - input deserializer
773 // dscc gets pixels every other cycle
775 // dscc - input cdc fifo
777 // dscc gets pixels every other cycle
779 // dscc - cdc uncertainty
781 // dscc - output cdc fifo
783 // dscc gets pixels every other cycle
785 // dscc - cdc uncertainty
787 // dscc - output serializer
791 } else if (pixelFormat == dm_n422) {
796 // dscc - input deserializer
798 // dscc - input cdc fifo
800 // dscc - cdc uncertainty
802 // dscc - output cdc fifo
804 // dscc - cdc uncertainty
806 // dscc - output serializer
815 // dscc - input deserializer
817 // dscc - input cdc fifo
819 // dscc - cdc uncertainty
821 // dscc - output cdc fifo
823 // dscc - output serializer
825 // dscc - cdc uncertainty
834 static bool CalculatePrefetchSchedule(
835 struct display_mode_lib *mode_lib,
836 double HostVMInefficiencyFactor,
838 unsigned int DSCDelay,
839 double DPPCLKDelaySubtotalPlusCNVCFormater,
840 double DPPCLKDelaySCL,
841 double DPPCLKDelaySCLLBOnly,
842 double DPPCLKDelayCNVCCursor,
843 double DISPCLKDelaySubtotal,
844 unsigned int DPP_RECOUT_WIDTH,
845 enum output_format_class OutputFormat,
846 unsigned int MaxInterDCNTileRepeaters,
847 unsigned int VStartup,
848 unsigned int MaxVStartup,
849 unsigned int GPUVMPageTableLevels,
852 unsigned int HostVMMaxNonCachedPageTableLevels,
853 double HostVMMinPageSize,
854 bool DynamicMetadataEnable,
855 bool DynamicMetadataVMEnabled,
856 int DynamicMetadataLinesBeforeActiveRequired,
857 unsigned int DynamicMetadataTransmittedBytes,
858 double UrgentLatency,
859 double UrgentExtraLatency,
861 unsigned int PDEAndMetaPTEBytesFrame,
862 unsigned int MetaRowByte,
863 unsigned int PixelPTEBytesPerRow,
864 double PrefetchSourceLinesY,
865 unsigned int SwathWidthY,
866 double VInitPreFillY,
867 unsigned int MaxNumSwathY,
868 double PrefetchSourceLinesC,
869 unsigned int SwathWidthC,
870 double VInitPreFillC,
871 unsigned int MaxNumSwathC,
872 int swath_width_luma_ub,
873 int swath_width_chroma_ub,
874 unsigned int SwathHeightY,
875 unsigned int SwathHeightC,
877 double *DSTXAfterScaler,
878 double *DSTYAfterScaler,
879 double *DestinationLinesForPrefetch,
880 double *PrefetchBandwidth,
881 double *DestinationLinesToRequestVMInVBlank,
882 double *DestinationLinesToRequestRowInVBlank,
883 double *VRatioPrefetchY,
884 double *VRatioPrefetchC,
885 double *RequiredPrefetchPixDataBWLuma,
886 double *RequiredPrefetchPixDataBWChroma,
887 bool *NotEnoughTimeForDynamicMetadata,
889 double *prefetch_vmrow_bw,
893 int *VUpdateOffsetPix,
894 double *VUpdateWidthPix,
895 double *VReadyOffsetPix)
897 bool MyError = false;
898 unsigned int DPPCycles, DISPCLKCycles;
899 double DSTTotalPixelsAfterScaler;
901 double dst_y_prefetch_equ;
902 #ifdef __DML_VBA_DEBUG__
905 double prefetch_bw_oto;
906 double prefetch_bw_pr;
909 double Tvm_oto_lines;
910 double Tr0_oto_lines;
911 double dst_y_prefetch_oto;
912 double TimeForFetchingMetaPTE = 0;
913 double TimeForFetchingRowInVBlank = 0;
914 double LinesToRequestPrefetchPixelData = 0;
915 unsigned int HostVMDynamicLevelsTrips;
919 double Tvm_trips_rounded;
920 double Tr0_trips_rounded;
923 double prefetch_bw_equ;
929 double prefetch_sw_bytes;
932 int max_vratio_pre = 4;
938 if (GPUVMEnable == true && HostVMEnable == true) {
939 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
941 HostVMDynamicLevelsTrips = 0;
943 #ifdef __DML_VBA_DEBUG__
944 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
946 CalculateVupdateAndDynamicMetadataParameters(
947 MaxInterDCNTileRepeaters,
950 myPipe->DCFCLKDeepSleep,
954 DynamicMetadataTransmittedBytes,
955 DynamicMetadataLinesBeforeActiveRequired,
956 myPipe->InterlaceEnable,
957 myPipe->ProgressiveToInterlaceUnitInOPP,
966 LineTime = myPipe->HTotal / myPipe->PixelClock;
967 trip_to_mem = UrgentLatency;
968 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
970 #ifdef __DML_VBA_ALLOW_DELTA__
971 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
973 if (DynamicMetadataVMEnabled == true) {
975 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
977 *Tdmdl = TWait + UrgentExtraLatency;
980 #ifdef __DML_VBA_ALLOW_DELTA__
981 if (DynamicMetadataEnable == false) {
986 if (DynamicMetadataEnable == true) {
987 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
988 *NotEnoughTimeForDynamicMetadata = true;
989 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
990 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
991 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
992 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
993 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
995 *NotEnoughTimeForDynamicMetadata = false;
998 *NotEnoughTimeForDynamicMetadata = false;
1001 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
1003 if (myPipe->ScalerEnabled)
1004 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
1006 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
1008 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
1010 DISPCLKCycles = DISPCLKDelaySubtotal;
1012 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
1015 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
1017 #ifdef __DML_VBA_DEBUG__
1018 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
1019 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
1020 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
1021 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
1022 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
1023 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
1024 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
1025 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
1028 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1030 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
1031 *DSTYAfterScaler = 1;
1033 *DSTYAfterScaler = 0;
1035 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1036 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1037 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1039 #ifdef __DML_VBA_DEBUG__
1040 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
1045 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1046 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1047 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1049 #ifdef __DML_VBA_ALLOW_DELTA__
1050 if (!myPipe->DCCEnable) {
1052 Tr0_trips_rounded = 0.0;
1058 Tvm_trips_rounded = 0.0;
1062 if (GPUVMPageTableLevels >= 3) {
1063 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1067 } else if (!myPipe->DCCEnable) {
1070 *Tno_bw = LineTime / 4;
1073 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
1074 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
1076 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
1078 prefetch_bw_pr = bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane;
1079 prefetch_bw_pr = dml_min(1, myPipe->VRatio) * prefetch_bw_pr;
1080 max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
1081 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
1082 prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw);
1084 min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre);
1085 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
1086 #ifdef __DML_VBA_DEBUG__
1087 Tsw_oto = Lsw_oto * LineTime;
1091 #ifdef __DML_VBA_DEBUG__
1092 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
1093 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
1094 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
1095 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
1096 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
1097 dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
1100 if (GPUVMEnable == true)
1101 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0);
1103 Tvm_oto = LineTime / 4.0;
1105 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1106 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term)
1110 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1113 #ifdef __DML_VBA_DEBUG__
1114 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
1115 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
1116 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
1117 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1118 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1119 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1120 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
1121 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
1122 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
1125 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1126 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1127 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1128 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1129 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1130 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1132 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
1134 if (prefetch_sw_bytes < dep_bytes)
1135 prefetch_sw_bytes = 2 * dep_bytes;
1137 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1138 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
1139 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
1140 dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
1141 dml_print("DML: LineTime: %f\n", LineTime);
1142 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
1144 dml_print("DML: LineTime: %f\n", LineTime);
1145 dml_print("DML: VStartup: %d\n", VStartup);
1146 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1147 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
1148 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1149 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1150 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1151 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1152 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1153 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd\n", *Tdmdl_vm);
1154 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", *Tdmdl);
1155 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler\n", *DSTXAfterScaler);
1156 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler\n", *DSTYAfterScaler);
1158 *PrefetchBandwidth = 0;
1159 *DestinationLinesToRequestVMInVBlank = 0;
1160 *DestinationLinesToRequestRowInVBlank = 0;
1161 *VRatioPrefetchY = 0;
1162 *VRatioPrefetchC = 0;
1163 *RequiredPrefetchPixDataBWLuma = 0;
1164 if (dst_y_prefetch_equ > 1) {
1165 double PrefetchBandwidth1;
1166 double PrefetchBandwidth2;
1167 double PrefetchBandwidth3;
1168 double PrefetchBandwidth4;
1170 if (Tpre_rounded - *Tno_bw > 0) {
1171 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1172 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
1173 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
1175 PrefetchBandwidth1 = 0;
1178 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
1179 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1180 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
1183 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1184 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1186 PrefetchBandwidth2 = 0;
1188 if (Tpre_rounded - Tvm_trips_rounded > 0) {
1189 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1190 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
1191 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
1193 PrefetchBandwidth3 = 0;
1196 #ifdef __DML_VBA_DEBUG__
1197 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
1198 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
1199 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
1201 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1202 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1203 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
1206 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1207 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1209 PrefetchBandwidth4 = 0;
1216 if (PrefetchBandwidth1 > 0) {
1217 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
1218 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1227 if (PrefetchBandwidth2 > 0) {
1228 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
1229 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1238 if (PrefetchBandwidth3 > 0) {
1239 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
1240 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1250 prefetch_bw_equ = PrefetchBandwidth1;
1251 } else if (Case2OK) {
1252 prefetch_bw_equ = PrefetchBandwidth2;
1253 } else if (Case3OK) {
1254 prefetch_bw_equ = PrefetchBandwidth3;
1256 prefetch_bw_equ = PrefetchBandwidth4;
1259 #ifdef __DML_VBA_DEBUG__
1260 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
1261 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
1262 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
1263 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
1266 if (prefetch_bw_equ > 0) {
1267 if (GPUVMEnable == true) {
1268 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1270 Tvm_equ = LineTime / 4;
1273 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1275 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1277 (LineTime - Tvm_equ) / 2,
1280 Tr0_equ = (LineTime - Tvm_equ) / 2;
1285 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1289 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1290 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1291 TimeForFetchingMetaPTE = Tvm_oto;
1292 TimeForFetchingRowInVBlank = Tr0_oto;
1293 *PrefetchBandwidth = prefetch_bw_oto;
1295 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1296 TimeForFetchingMetaPTE = Tvm_equ;
1297 TimeForFetchingRowInVBlank = Tr0_equ;
1298 *PrefetchBandwidth = prefetch_bw_equ;
1301 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1303 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1305 #ifdef __DML_VBA_ALLOW_DELTA__
1306 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
1307 // See note above dated 5/30/2018
1308 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
1309 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
1311 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
1314 #ifdef __DML_VBA_DEBUG__
1315 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
1316 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1317 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
1318 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1319 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1320 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1321 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
1324 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1326 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
1327 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1328 #ifdef __DML_VBA_DEBUG__
1329 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1330 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
1331 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
1333 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1334 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1335 *VRatioPrefetchY = dml_max(
1336 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1337 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1338 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1341 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1342 *VRatioPrefetchY = 0;
1344 #ifdef __DML_VBA_DEBUG__
1345 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1346 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1347 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
1351 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1352 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1354 #ifdef __DML_VBA_DEBUG__
1355 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1356 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
1357 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
1359 if ((SwathHeightC > 4) || VInitPreFillC > 3) {
1360 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1361 *VRatioPrefetchC = dml_max(
1363 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1364 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1367 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1368 *VRatioPrefetchC = 0;
1370 #ifdef __DML_VBA_DEBUG__
1371 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1372 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
1373 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
1377 #ifdef __DML_VBA_DEBUG__
1378 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
1379 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
1380 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1383 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
1385 #ifdef __DML_VBA_DEBUG__
1386 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
1389 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
1393 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1394 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1395 *VRatioPrefetchY = 0;
1396 *VRatioPrefetchC = 0;
1397 *RequiredPrefetchPixDataBWLuma = 0;
1398 *RequiredPrefetchPixDataBWChroma = 0;
1402 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
1403 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1404 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1405 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1407 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
1408 (double) LinesToRequestPrefetchPixelData * LineTime);
1409 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
1410 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1412 "DML: Tslack(pre): %fus - time left over in schedule\n",
1413 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
1414 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
1415 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1419 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1423 double prefetch_vm_bw;
1424 double prefetch_row_bw;
1426 if (PDEAndMetaPTEBytesFrame == 0) {
1428 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1429 #ifdef __DML_VBA_DEBUG__
1430 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1431 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1432 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1433 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1435 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1436 #ifdef __DML_VBA_DEBUG__
1437 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1442 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1445 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1446 prefetch_row_bw = 0;
1447 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1448 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1450 #ifdef __DML_VBA_DEBUG__
1451 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1452 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1453 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1454 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1457 prefetch_row_bw = 0;
1459 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1462 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1466 *PrefetchBandwidth = 0;
1467 TimeForFetchingMetaPTE = 0;
1468 TimeForFetchingRowInVBlank = 0;
1469 *DestinationLinesToRequestVMInVBlank = 0;
1470 *DestinationLinesToRequestRowInVBlank = 0;
1471 *DestinationLinesForPrefetch = 0;
1472 LinesToRequestPrefetchPixelData = 0;
1473 *VRatioPrefetchY = 0;
1474 *VRatioPrefetchC = 0;
1475 *RequiredPrefetchPixDataBWLuma = 0;
1476 *RequiredPrefetchPixDataBWChroma = 0;
1482 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1484 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1487 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1489 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1492 static void CalculateDCCConfiguration(
1494 bool DCCProgrammingAssumesScanDirectionUnknown,
1495 enum source_format_class SourcePixelFormat,
1496 unsigned int SurfaceWidthLuma,
1497 unsigned int SurfaceWidthChroma,
1498 unsigned int SurfaceHeightLuma,
1499 unsigned int SurfaceHeightChroma,
1500 double DETBufferSize,
1501 unsigned int RequestHeight256ByteLuma,
1502 unsigned int RequestHeight256ByteChroma,
1503 enum dm_swizzle_mode TilingFormat,
1504 unsigned int BytePerPixelY,
1505 unsigned int BytePerPixelC,
1506 double BytePerPixelDETY,
1507 double BytePerPixelDETC,
1508 enum scan_direction_class ScanOrientation,
1509 unsigned int *MaxUncompressedBlockLuma,
1510 unsigned int *MaxUncompressedBlockChroma,
1511 unsigned int *MaxCompressedBlockLuma,
1512 unsigned int *MaxCompressedBlockChroma,
1513 unsigned int *IndependentBlockLuma,
1514 unsigned int *IndependentBlockChroma)
1523 double detile_buf_vp_horz_limit;
1524 double detile_buf_vp_vert_limit;
1526 int MAS_vp_horz_limit;
1527 int MAS_vp_vert_limit;
1528 int max_vp_horz_width;
1529 int max_vp_vert_height;
1530 int eff_surf_width_l;
1531 int eff_surf_width_c;
1532 int eff_surf_height_l;
1533 int eff_surf_height_c;
1535 int full_swath_bytes_horz_wc_l;
1536 int full_swath_bytes_horz_wc_c;
1537 int full_swath_bytes_vert_wc_l;
1538 int full_swath_bytes_vert_wc_c;
1539 int req128_horz_wc_l;
1540 int req128_horz_wc_c;
1541 int req128_vert_wc_l;
1542 int req128_vert_wc_c;
1543 int segment_order_horz_contiguous_luma;
1544 int segment_order_horz_contiguous_chroma;
1545 int segment_order_vert_contiguous_luma;
1546 int segment_order_vert_contiguous_chroma;
1549 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
1551 RequestType RequestLuma;
1552 RequestType RequestChroma;
1554 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1560 if (BytePerPixelY == 1)
1562 if (BytePerPixelC == 1)
1564 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1566 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1569 if (BytePerPixelC == 0) {
1570 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1571 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
1572 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1574 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1575 detile_buf_vp_horz_limit = (double) swath_buf_size
1576 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
1577 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
1578 detile_buf_vp_vert_limit = (double) swath_buf_size
1579 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
1582 if (SourcePixelFormat == dm_420_10) {
1583 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1584 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1587 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1588 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1590 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
1591 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1592 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1593 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1594 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1595 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1596 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
1597 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1599 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1600 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1601 if (BytePerPixelC > 0) {
1602 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
1603 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1605 full_swath_bytes_horz_wc_c = 0;
1606 full_swath_bytes_vert_wc_c = 0;
1609 if (SourcePixelFormat == dm_420_10) {
1610 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1611 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1612 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1613 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1616 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1617 req128_horz_wc_l = 0;
1618 req128_horz_wc_c = 0;
1619 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
1620 req128_horz_wc_l = 0;
1621 req128_horz_wc_c = 1;
1622 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1623 req128_horz_wc_l = 1;
1624 req128_horz_wc_c = 0;
1626 req128_horz_wc_l = 1;
1627 req128_horz_wc_c = 1;
1630 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1631 req128_vert_wc_l = 0;
1632 req128_vert_wc_c = 0;
1633 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
1634 req128_vert_wc_l = 0;
1635 req128_vert_wc_c = 1;
1636 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1637 req128_vert_wc_l = 1;
1638 req128_vert_wc_c = 0;
1640 req128_vert_wc_l = 1;
1641 req128_vert_wc_c = 1;
1644 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1645 segment_order_horz_contiguous_luma = 0;
1647 segment_order_horz_contiguous_luma = 1;
1649 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1650 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1651 segment_order_vert_contiguous_luma = 0;
1653 segment_order_vert_contiguous_luma = 1;
1655 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1656 segment_order_horz_contiguous_chroma = 0;
1658 segment_order_horz_contiguous_chroma = 1;
1660 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1661 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1662 segment_order_vert_contiguous_chroma = 0;
1664 segment_order_vert_contiguous_chroma = 1;
1667 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1668 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1669 RequestLuma = REQ_256Bytes;
1670 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1671 RequestLuma = REQ_128BytesNonContiguous;
1673 RequestLuma = REQ_128BytesContiguous;
1675 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1676 RequestChroma = REQ_256Bytes;
1677 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
1678 RequestChroma = REQ_128BytesNonContiguous;
1680 RequestChroma = REQ_128BytesContiguous;
1682 } else if (ScanOrientation != dm_vert) {
1683 if (req128_horz_wc_l == 0) {
1684 RequestLuma = REQ_256Bytes;
1685 } else if (segment_order_horz_contiguous_luma == 0) {
1686 RequestLuma = REQ_128BytesNonContiguous;
1688 RequestLuma = REQ_128BytesContiguous;
1690 if (req128_horz_wc_c == 0) {
1691 RequestChroma = REQ_256Bytes;
1692 } else if (segment_order_horz_contiguous_chroma == 0) {
1693 RequestChroma = REQ_128BytesNonContiguous;
1695 RequestChroma = REQ_128BytesContiguous;
1698 if (req128_vert_wc_l == 0) {
1699 RequestLuma = REQ_256Bytes;
1700 } else if (segment_order_vert_contiguous_luma == 0) {
1701 RequestLuma = REQ_128BytesNonContiguous;
1703 RequestLuma = REQ_128BytesContiguous;
1705 if (req128_vert_wc_c == 0) {
1706 RequestChroma = REQ_256Bytes;
1707 } else if (segment_order_vert_contiguous_chroma == 0) {
1708 RequestChroma = REQ_128BytesNonContiguous;
1710 RequestChroma = REQ_128BytesContiguous;
1714 if (RequestLuma == REQ_256Bytes) {
1715 *MaxUncompressedBlockLuma = 256;
1716 *MaxCompressedBlockLuma = 256;
1717 *IndependentBlockLuma = 0;
1718 } else if (RequestLuma == REQ_128BytesContiguous) {
1719 *MaxUncompressedBlockLuma = 256;
1720 *MaxCompressedBlockLuma = 128;
1721 *IndependentBlockLuma = 128;
1723 *MaxUncompressedBlockLuma = 256;
1724 *MaxCompressedBlockLuma = 64;
1725 *IndependentBlockLuma = 64;
1728 if (RequestChroma == REQ_256Bytes) {
1729 *MaxUncompressedBlockChroma = 256;
1730 *MaxCompressedBlockChroma = 256;
1731 *IndependentBlockChroma = 0;
1732 } else if (RequestChroma == REQ_128BytesContiguous) {
1733 *MaxUncompressedBlockChroma = 256;
1734 *MaxCompressedBlockChroma = 128;
1735 *IndependentBlockChroma = 128;
1737 *MaxUncompressedBlockChroma = 256;
1738 *MaxCompressedBlockChroma = 64;
1739 *IndependentBlockChroma = 64;
1742 if (DCCEnabled != true || BytePerPixelC == 0) {
1743 *MaxUncompressedBlockChroma = 0;
1744 *MaxCompressedBlockChroma = 0;
1745 *IndependentBlockChroma = 0;
1748 if (DCCEnabled != true) {
1749 *MaxUncompressedBlockLuma = 0;
1750 *MaxCompressedBlockLuma = 0;
1751 *IndependentBlockLuma = 0;
1755 static double CalculatePrefetchSourceLines(
1756 struct display_mode_lib *mode_lib,
1760 bool ProgressiveToInterlaceUnitInOPP,
1761 unsigned int SwathHeight,
1762 unsigned int ViewportYStart,
1763 double *VInitPreFill,
1764 unsigned int *MaxNumSwath)
1766 struct vba_vars_st *v = &mode_lib->vba;
1767 unsigned int MaxPartialSwath;
1769 if (ProgressiveToInterlaceUnitInOPP)
1770 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1772 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1774 if (!v->IgnoreViewportPositioning) {
1776 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1778 if (*VInitPreFill > 1.0)
1779 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1781 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
1782 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1786 if (ViewportYStart != 0)
1787 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1789 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1791 if (*VInitPreFill > 1.0)
1792 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1794 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
1797 #ifdef __DML_VBA_DEBUG__
1798 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
1799 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
1800 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
1801 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
1802 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
1803 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
1804 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
1805 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
1806 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
1808 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1811 static unsigned int CalculateVMAndRowBytes(
1812 struct display_mode_lib *mode_lib,
1814 unsigned int BlockHeight256Bytes,
1815 unsigned int BlockWidth256Bytes,
1816 enum source_format_class SourcePixelFormat,
1817 unsigned int SurfaceTiling,
1818 unsigned int BytePerPixel,
1819 enum scan_direction_class ScanDirection,
1820 unsigned int SwathWidth,
1821 unsigned int ViewportHeight,
1824 unsigned int HostVMMaxNonCachedPageTableLevels,
1825 unsigned int GPUVMMinPageSize,
1826 unsigned int HostVMMinPageSize,
1827 unsigned int PTEBufferSizeInRequests,
1829 unsigned int DCCMetaPitch,
1830 unsigned int *MacroTileWidth,
1831 unsigned int *MetaRowByte,
1832 unsigned int *PixelPTEBytesPerRow,
1833 bool *PTEBufferSizeNotExceeded,
1834 int *dpte_row_width_ub,
1835 unsigned int *dpte_row_height,
1836 unsigned int *MetaRequestWidth,
1837 unsigned int *MetaRequestHeight,
1838 unsigned int *meta_row_width,
1839 unsigned int *meta_row_height,
1840 int *vm_group_bytes,
1841 unsigned int *dpte_group_bytes,
1842 unsigned int *PixelPTEReqWidth,
1843 unsigned int *PixelPTEReqHeight,
1844 unsigned int *PTERequestSize,
1845 int *DPDE0BytesFrame,
1846 int *MetaPTEBytesFrame)
1848 struct vba_vars_st *v = &mode_lib->vba;
1849 unsigned int MPDEBytesFrame;
1850 unsigned int DCCMetaSurfaceBytes;
1851 unsigned int MacroTileSizeBytes;
1852 unsigned int MacroTileHeight;
1853 unsigned int ExtraDPDEBytesFrame;
1854 unsigned int PDEAndMetaPTEBytesFrame;
1855 unsigned int PixelPTEReqHeightPTEs = 0;
1856 unsigned int HostVMDynamicLevels = 0;
1857 double FractionOfPTEReturnDrop;
1859 if (GPUVMEnable == true && HostVMEnable == true) {
1860 if (HostVMMinPageSize < 2048) {
1861 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1862 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1863 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1865 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1869 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1870 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1871 if (ScanDirection != dm_vert) {
1872 *meta_row_height = *MetaRequestHeight;
1873 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
1874 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1876 *meta_row_height = *MetaRequestWidth;
1877 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
1878 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1880 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1881 if (GPUVMEnable == true) {
1882 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1883 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
1885 *MetaPTEBytesFrame = 0;
1889 if (DCCEnable != true) {
1890 *MetaPTEBytesFrame = 0;
1895 if (SurfaceTiling == dm_sw_linear) {
1896 MacroTileSizeBytes = 256;
1897 MacroTileHeight = BlockHeight256Bytes;
1899 MacroTileSizeBytes = 65536;
1900 MacroTileHeight = 16 * BlockHeight256Bytes;
1902 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1904 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
1905 if (ScanDirection != dm_vert) {
1906 *DPDE0BytesFrame = 64
1908 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1912 *DPDE0BytesFrame = 64
1914 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1918 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
1920 *DPDE0BytesFrame = 0;
1921 ExtraDPDEBytesFrame = 0;
1924 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
1926 #ifdef __DML_VBA_DEBUG__
1927 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
1928 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
1929 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
1930 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
1931 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1934 if (HostVMEnable == true) {
1935 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1937 #ifdef __DML_VBA_DEBUG__
1938 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1941 if (SurfaceTiling == dm_sw_linear) {
1942 PixelPTEReqHeightPTEs = 1;
1943 *PixelPTEReqHeight = 1;
1944 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1945 *PTERequestSize = 64;
1946 FractionOfPTEReturnDrop = 0;
1947 } else if (MacroTileSizeBytes == 4096) {
1948 PixelPTEReqHeightPTEs = 1;
1949 *PixelPTEReqHeight = MacroTileHeight;
1950 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1951 *PTERequestSize = 64;
1952 if (ScanDirection != dm_vert)
1953 FractionOfPTEReturnDrop = 0;
1955 FractionOfPTEReturnDrop = 7 / 8;
1956 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1957 PixelPTEReqHeightPTEs = 16;
1958 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1959 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1960 *PTERequestSize = 128;
1961 FractionOfPTEReturnDrop = 0;
1963 PixelPTEReqHeightPTEs = 1;
1964 *PixelPTEReqHeight = MacroTileHeight;
1965 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1966 *PTERequestSize = 64;
1967 FractionOfPTEReturnDrop = 0;
1970 if (SurfaceTiling == dm_sw_linear) {
1971 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
1972 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1973 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1974 } else if (ScanDirection != dm_vert) {
1975 *dpte_row_height = *PixelPTEReqHeight;
1976 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1977 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1979 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1980 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1981 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1984 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
1985 *PTEBufferSizeNotExceeded = true;
1987 *PTEBufferSizeNotExceeded = false;
1990 if (GPUVMEnable != true) {
1991 *PixelPTEBytesPerRow = 0;
1992 *PTEBufferSizeNotExceeded = true;
1995 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
1997 if (HostVMEnable == true) {
1998 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2001 if (HostVMEnable == true) {
2002 *vm_group_bytes = 512;
2003 *dpte_group_bytes = 512;
2004 } else if (GPUVMEnable == true) {
2005 *vm_group_bytes = 2048;
2006 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
2007 *dpte_group_bytes = 512;
2009 *dpte_group_bytes = 2048;
2012 *vm_group_bytes = 0;
2013 *dpte_group_bytes = 0;
2015 return PDEAndMetaPTEBytesFrame;
2018 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
2020 struct vba_vars_st *v = &mode_lib->vba;
2022 double HostVMInefficiencyFactor = 1.0;
2023 bool NoChromaPlanes = true;
2025 double VMDataOnlyReturnBW;
2026 double MaxTotalRDBandwidth = 0;
2027 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
2029 v->WritebackDISPCLK = 0.0;
2030 v->DISPCLKWithRamping = 0;
2031 v->DISPCLKWithoutRamping = 0;
2032 v->GlobalDPPCLK = 0.0;
2033 /* DAL custom code: need to update ReturnBW in case min dcfclk is overridden */
2035 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
2036 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
2037 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
2038 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
2040 if (v->HostVMEnable != true) {
2041 v->ReturnBW = dml_min(
2042 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2043 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
2045 v->ReturnBW = dml_min(
2046 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2047 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
2050 /* End DAL custom code */
2052 // DISPCLK and DPPCLK Calculation
2054 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2055 if (v->WritebackEnable[k]) {
2056 v->WritebackDISPCLK = dml_max(
2057 v->WritebackDISPCLK,
2058 dml314_CalculateWriteBackDISPCLK(
2059 v->WritebackPixelFormat[k],
2061 v->WritebackHRatio[k],
2062 v->WritebackVRatio[k],
2063 v->WritebackHTaps[k],
2064 v->WritebackVTaps[k],
2065 v->WritebackSourceWidth[k],
2066 v->WritebackDestinationWidth[k],
2068 v->WritebackLineBufferSize));
2072 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2073 if (v->HRatio[k] > 1) {
2074 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
2075 v->MaxDCHUBToPSCLThroughput,
2076 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
2078 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2081 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
2083 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
2084 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
2086 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
2087 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
2090 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
2091 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
2092 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
2093 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
2095 if (v->HRatioChroma[k] > 1) {
2096 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
2097 v->MaxDCHUBToPSCLThroughput,
2098 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
2100 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2102 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2104 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
2105 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
2108 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
2109 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
2112 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma);
2116 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2117 if (v->BlendingAndTiming[k] != k)
2119 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2120 v->DISPCLKWithRamping = dml_max(
2121 v->DISPCLKWithRamping,
2122 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2123 * (1 + v->DISPCLKRampingMargin / 100));
2124 v->DISPCLKWithoutRamping = dml_max(
2125 v->DISPCLKWithoutRamping,
2126 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2127 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2128 v->DISPCLKWithRamping = dml_max(
2129 v->DISPCLKWithRamping,
2130 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2131 * (1 + v->DISPCLKRampingMargin / 100));
2132 v->DISPCLKWithoutRamping = dml_max(
2133 v->DISPCLKWithoutRamping,
2134 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2136 v->DISPCLKWithRamping = dml_max(
2137 v->DISPCLKWithRamping,
2138 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
2139 v->DISPCLKWithoutRamping = dml_max(
2140 v->DISPCLKWithoutRamping,
2141 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2145 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK);
2146 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK);
2148 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2149 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed);
2150 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed);
2151 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2152 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
2153 v->DISPCLKDPPCLKVCOSpeed);
2154 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2155 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2156 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2157 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2159 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
2161 v->DISPCLK = v->DISPCLK_calculated;
2162 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2164 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2165 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2166 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]);
2168 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed);
2169 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2170 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1);
2171 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2174 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2175 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2178 // Urgent and B P-State/DRAM Clock Change Watermark
2179 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2180 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2182 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2183 CalculateBytePerPixelAnd256BBlockSizes(
2184 v->SourcePixelFormat[k],
2185 v->SurfaceTiling[k],
2186 &v->BytePerPixelY[k],
2187 &v->BytePerPixelC[k],
2188 &v->BytePerPixelDETY[k],
2189 &v->BytePerPixelDETC[k],
2190 &v->BlockHeight256BytesY[k],
2191 &v->BlockHeight256BytesC[k],
2192 &v->BlockWidth256BytesY[k],
2193 &v->BlockWidth256BytesC[k]);
2196 CalculateSwathWidth(
2198 v->NumberOfActivePlanes,
2199 v->SourcePixelFormat,
2207 v->ODMCombineEnabled,
2210 v->BlockHeight256BytesY,
2211 v->BlockHeight256BytesC,
2212 v->BlockWidth256BytesY,
2213 v->BlockWidth256BytesC,
2214 v->BlendingAndTiming,
2218 v->SwathWidthSingleDPPY,
2219 v->SwathWidthSingleDPPC,
2224 v->swath_width_luma_ub,
2225 v->swath_width_chroma_ub);
2227 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2228 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
2230 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
2231 * v->VRatioChroma[k];
2232 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2235 // DCFCLK Deep Sleep
2236 CalculateDCFCLKDeepSleep(
2238 v->NumberOfActivePlanes,
2249 v->PSCL_THROUGHPUT_LUMA,
2250 v->PSCL_THROUGHPUT_CHROMA,
2252 v->ReadBandwidthPlaneLuma,
2253 v->ReadBandwidthPlaneChroma,
2255 &v->DCFCLKDeepSleep);
2258 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2259 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2260 v->DSCCLK_calculated[k] = 0.0;
2262 if (v->OutputFormat[k] == dm_420)
2263 v->DSCFormatFactor = 2;
2264 else if (v->OutputFormat[k] == dm_444)
2265 v->DSCFormatFactor = 1;
2266 else if (v->OutputFormat[k] == dm_n422)
2267 v->DSCFormatFactor = 2;
2269 v->DSCFormatFactor = 1;
2270 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2271 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
2272 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2273 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2274 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
2275 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2277 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
2278 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2283 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2284 double BPP = v->OutputBpp[k];
2286 if (v->DSCEnabled[k] && BPP != 0) {
2287 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2288 v->DSCDelay[k] = dscceComputeDelay(
2289 v->DSCInputBitPerComponent[k],
2291 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2292 v->NumberOfDSCSlices[k],
2294 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2295 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2297 * (dscceComputeDelay(
2298 v->DSCInputBitPerComponent[k],
2300 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2301 v->NumberOfDSCSlices[k] / 2.0,
2303 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2306 * (dscceComputeDelay(
2307 v->DSCInputBitPerComponent[k],
2309 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2310 v->NumberOfDSCSlices[k] / 4.0,
2312 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2314 v->DSCDelay[k] = v->DSCDelay[k] + (v->HTotal[k] - v->HActive[k]) * dml_ceil((double) v->DSCDelay[k] / v->HActive[k], 1);
2315 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2321 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2322 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2323 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
2324 v->DSCDelay[k] = v->DSCDelay[j];
2327 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2328 unsigned int PDEAndMetaPTEBytesFrameY;
2329 unsigned int PixelPTEBytesPerRowY;
2330 unsigned int MetaRowByteY;
2331 unsigned int MetaRowByteC;
2332 unsigned int PDEAndMetaPTEBytesFrameC;
2333 unsigned int PixelPTEBytesPerRowC;
2334 bool PTEBufferSizeNotExceededY;
2335 bool PTEBufferSizeNotExceededC;
2337 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2338 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2339 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2340 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2341 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2343 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2344 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2347 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2350 v->BlockHeight256BytesC[k],
2351 v->BlockWidth256BytesC[k],
2352 v->SourcePixelFormat[k],
2353 v->SurfaceTiling[k],
2354 v->BytePerPixelC[k],
2357 v->ViewportHeightChroma[k],
2360 v->HostVMMaxNonCachedPageTableLevels,
2361 v->GPUVMMinPageSize,
2362 v->HostVMMinPageSize,
2363 v->PTEBufferSizeInRequestsForChroma,
2365 v->DCCMetaPitchC[k],
2366 &v->MacroTileWidthC[k],
2368 &PixelPTEBytesPerRowC,
2369 &PTEBufferSizeNotExceededC,
2370 &v->dpte_row_width_chroma_ub[k],
2371 &v->dpte_row_height_chroma[k],
2372 &v->meta_req_width_chroma[k],
2373 &v->meta_req_height_chroma[k],
2374 &v->meta_row_width_chroma[k],
2375 &v->meta_row_height_chroma[k],
2378 &v->PixelPTEReqWidthC[k],
2379 &v->PixelPTEReqHeightC[k],
2380 &v->PTERequestSizeC[k],
2381 &v->dpde0_bytes_per_frame_ub_c[k],
2382 &v->meta_pte_bytes_per_frame_ub_c[k]);
2384 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2389 v->ProgressiveToInterlaceUnitInOPP,
2391 v->ViewportYStartC[k],
2392 &v->VInitPreFillC[k],
2393 &v->MaxNumSwathC[k]);
2395 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2396 v->PTEBufferSizeInRequestsForChroma = 0;
2397 PixelPTEBytesPerRowC = 0;
2398 PDEAndMetaPTEBytesFrameC = 0;
2400 v->MaxNumSwathC[k] = 0;
2401 v->PrefetchSourceLinesC[k] = 0;
2404 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2407 v->BlockHeight256BytesY[k],
2408 v->BlockWidth256BytesY[k],
2409 v->SourcePixelFormat[k],
2410 v->SurfaceTiling[k],
2411 v->BytePerPixelY[k],
2414 v->ViewportHeight[k],
2417 v->HostVMMaxNonCachedPageTableLevels,
2418 v->GPUVMMinPageSize,
2419 v->HostVMMinPageSize,
2420 v->PTEBufferSizeInRequestsForLuma,
2422 v->DCCMetaPitchY[k],
2423 &v->MacroTileWidthY[k],
2425 &PixelPTEBytesPerRowY,
2426 &PTEBufferSizeNotExceededY,
2427 &v->dpte_row_width_luma_ub[k],
2428 &v->dpte_row_height[k],
2429 &v->meta_req_width[k],
2430 &v->meta_req_height[k],
2431 &v->meta_row_width[k],
2432 &v->meta_row_height[k],
2433 &v->vm_group_bytes[k],
2434 &v->dpte_group_bytes[k],
2435 &v->PixelPTEReqWidthY[k],
2436 &v->PixelPTEReqHeightY[k],
2437 &v->PTERequestSizeY[k],
2438 &v->dpde0_bytes_per_frame_ub_l[k],
2439 &v->meta_pte_bytes_per_frame_ub_l[k]);
2441 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2446 v->ProgressiveToInterlaceUnitInOPP,
2448 v->ViewportYStartY[k],
2449 &v->VInitPreFillY[k],
2450 &v->MaxNumSwathY[k]);
2451 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2452 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2453 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2455 CalculateRowBandwidth(
2457 v->SourcePixelFormat[k],
2461 v->HTotal[k] / v->PixelClock[k],
2464 v->meta_row_height[k],
2465 v->meta_row_height_chroma[k],
2466 PixelPTEBytesPerRowY,
2467 PixelPTEBytesPerRowC,
2468 v->dpte_row_height[k],
2469 v->dpte_row_height_chroma[k],
2471 &v->dpte_row_bw[k]);
2474 v->TotalDCCActiveDPP = 0;
2475 v->TotalActiveDPP = 0;
2476 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2477 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
2478 if (v->DCCEnable[k])
2479 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
2480 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2481 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
2482 NoChromaPlanes = false;
2485 ReorderBytes = v->NumberOfChannels
2487 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2488 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2489 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2491 VMDataOnlyReturnBW = dml_min(
2492 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn)
2493 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2494 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
2495 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
2497 #ifdef __DML_VBA_DEBUG__
2498 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
2499 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
2500 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
2501 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
2502 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
2503 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
2504 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
2505 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
2506 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
2507 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
2508 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
2511 if (v->GPUVMEnable && v->HostVMEnable)
2512 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
2514 v->UrgentExtraLatency = CalculateExtraLatency(
2515 v->RoundTripPingLatencyCycles,
2519 v->PixelChunkSizeInKByte,
2520 v->TotalDCCActiveDPP,
2525 v->NumberOfActivePlanes,
2527 v->dpte_group_bytes,
2528 HostVMInefficiencyFactor,
2529 v->HostVMMinPageSize,
2530 v->HostVMMaxNonCachedPageTableLevels);
2532 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2534 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2535 if (v->BlendingAndTiming[k] == k) {
2536 if (v->WritebackEnable[k] == true) {
2537 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
2538 + CalculateWriteBackDelay(
2539 v->WritebackPixelFormat[k],
2540 v->WritebackHRatio[k],
2541 v->WritebackVRatio[k],
2542 v->WritebackVTaps[k],
2543 v->WritebackDestinationWidth[k],
2544 v->WritebackDestinationHeight[k],
2545 v->WritebackSourceHeight[k],
2546 v->HTotal[k]) / v->DISPCLK;
2548 v->WritebackDelay[v->VoltageLevel][k] = 0;
2549 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2550 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
2551 v->WritebackDelay[v->VoltageLevel][k] = dml_max(
2552 v->WritebackDelay[v->VoltageLevel][k],
2554 + CalculateWriteBackDelay(
2555 v->WritebackPixelFormat[j],
2556 v->WritebackHRatio[j],
2557 v->WritebackVRatio[j],
2558 v->WritebackVTaps[j],
2559 v->WritebackDestinationWidth[j],
2560 v->WritebackDestinationHeight[j],
2561 v->WritebackSourceHeight[j],
2562 v->HTotal[k]) / v->DISPCLK);
2568 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2569 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2570 if (v->BlendingAndTiming[k] == j)
2571 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2573 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2574 v->MaxVStartupLines[k] =
2575 CalculateMaxVStartup(
2581 v->ProgressiveToInterlaceUnitInOPP,
2583 v->ip.VBlankNomDefaultUS,
2584 v->WritebackDelay[v->VoltageLevel][k]);
2586 #ifdef __DML_VBA_DEBUG__
2587 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
2588 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
2589 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
2593 v->MaximumMaxVStartupLines = 0;
2594 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2595 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2598 // We don't really care to iterate between the various prefetch modes
2599 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
2601 v->UrgentLatency = CalculateUrgentLatency(
2602 v->UrgentLatencyPixelDataOnly,
2603 v->UrgentLatencyPixelMixedWithVMData,
2604 v->UrgentLatencyVMDataOnly,
2605 v->DoUrgentLatencyAdjustment,
2606 v->UrgentLatencyAdjustmentFabricClockComponent,
2607 v->UrgentLatencyAdjustmentFabricClockReference,
2610 v->FractionOfUrgentBandwidth = 0.0;
2611 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2613 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
2616 double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
2617 bool DestinationLineTimesForPrefetchLessThan2 = false;
2618 bool VRatioPrefetchMoreThan4 = false;
2619 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime);
2621 MaxTotalRDBandwidth = 0;
2623 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
2625 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2628 myPipe.DPPCLK = v->DPPCLK[k];
2629 myPipe.DISPCLK = v->DISPCLK;
2630 myPipe.PixelClock = v->PixelClock[k];
2631 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2632 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2633 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2634 myPipe.VRatio = v->VRatio[k];
2635 myPipe.VRatioChroma = v->VRatioChroma[k];
2636 myPipe.SourceScan = v->SourceScan[k];
2637 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2638 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2639 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2640 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2641 myPipe.InterlaceEnable = v->Interlace[k];
2642 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2643 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2644 myPipe.HTotal = v->HTotal[k];
2645 myPipe.DCCEnable = v->DCCEnable[k];
2646 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
2647 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
2648 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
2649 myPipe.BytePerPixelY = v->BytePerPixelY[k];
2650 myPipe.BytePerPixelC = v->BytePerPixelC[k];
2651 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
2652 v->ErrorResult[k] = CalculatePrefetchSchedule(
2654 HostVMInefficiencyFactor,
2657 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
2659 v->DPPCLKDelaySCLLBOnly,
2660 v->DPPCLKDelayCNVCCursor,
2661 v->DISPCLKDelaySubtotal,
2662 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2664 v->MaxInterDCNTileRepeaters,
2665 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2666 v->MaxVStartupLines[k],
2667 v->GPUVMMaxPageTableLevels,
2670 v->HostVMMaxNonCachedPageTableLevels,
2671 v->HostVMMinPageSize,
2672 v->DynamicMetadataEnable[k],
2673 v->DynamicMetadataVMEnabled,
2674 v->DynamicMetadataLinesBeforeActiveRequired[k],
2675 v->DynamicMetadataTransmittedBytes[k],
2677 v->UrgentExtraLatency,
2679 v->PDEAndMetaPTEBytesFrame[k],
2681 v->PixelPTEBytesPerRow[k],
2682 v->PrefetchSourceLinesY[k],
2684 v->VInitPreFillY[k],
2686 v->PrefetchSourceLinesC[k],
2688 v->VInitPreFillC[k],
2690 v->swath_width_luma_ub[k],
2691 v->swath_width_chroma_ub[k],
2695 &v->DSTXAfterScaler[k],
2696 &v->DSTYAfterScaler[k],
2697 &v->DestinationLinesForPrefetch[k],
2698 &v->PrefetchBandwidth[k],
2699 &v->DestinationLinesToRequestVMInVBlank[k],
2700 &v->DestinationLinesToRequestRowInVBlank[k],
2701 &v->VRatioPrefetchY[k],
2702 &v->VRatioPrefetchC[k],
2703 &v->RequiredPrefetchPixDataBWLuma[k],
2704 &v->RequiredPrefetchPixDataBWChroma[k],
2705 &v->NotEnoughTimeForDynamicMetadata[k],
2707 &v->prefetch_vmrow_bw[k],
2711 &v->VUpdateOffsetPix[k],
2712 &v->VUpdateWidthPix[k],
2713 &v->VReadyOffsetPix[k]);
2715 #ifdef __DML_VBA_DEBUG__
2716 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
2718 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2721 v->NoEnoughUrgentLatencyHiding = false;
2722 v->NoEnoughUrgentLatencyHidingPre = false;
2724 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2725 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2726 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2727 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2728 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
2730 CalculateUrgentBurstFactor(
2731 v->swath_width_luma_ub[k],
2732 v->swath_width_chroma_ub[k],
2735 v->HTotal[k] / v->PixelClock[k],
2737 v->CursorBufferSize,
2738 v->CursorWidth[k][0],
2742 v->BytePerPixelDETY[k],
2743 v->BytePerPixelDETC[k],
2744 v->DETBufferSizeY[k],
2745 v->DETBufferSizeC[k],
2746 &v->UrgBurstFactorCursor[k],
2747 &v->UrgBurstFactorLuma[k],
2748 &v->UrgBurstFactorChroma[k],
2749 &v->NoUrgentLatencyHiding[k]);
2751 CalculateUrgentBurstFactor(
2752 v->swath_width_luma_ub[k],
2753 v->swath_width_chroma_ub[k],
2756 v->HTotal[k] / v->PixelClock[k],
2758 v->CursorBufferSize,
2759 v->CursorWidth[k][0],
2761 v->VRatioPrefetchY[k],
2762 v->VRatioPrefetchC[k],
2763 v->BytePerPixelDETY[k],
2764 v->BytePerPixelDETC[k],
2765 v->DETBufferSizeY[k],
2766 v->DETBufferSizeC[k],
2767 &v->UrgBurstFactorCursorPre[k],
2768 &v->UrgBurstFactorLumaPre[k],
2769 &v->UrgBurstFactorChromaPre[k],
2770 &v->NoUrgentLatencyHidingPre[k]);
2772 MaxTotalRDBandwidth = MaxTotalRDBandwidth
2774 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2775 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2776 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2777 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
2778 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2780 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2781 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2782 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2784 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
2786 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2787 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
2788 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2789 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
2790 + v->cursor_bw_pre[k]);
2792 #ifdef __DML_VBA_DEBUG__
2793 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
2794 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
2795 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
2796 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
2797 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
2799 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
2800 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
2802 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
2803 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
2804 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
2805 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
2806 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
2807 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
2808 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
2809 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
2810 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
2811 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
2814 if (v->DestinationLinesForPrefetch[k] < 2)
2815 DestinationLineTimesForPrefetchLessThan2 = true;
2817 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2818 VRatioPrefetchMoreThan4 = true;
2820 if (v->NoUrgentLatencyHiding[k] == true)
2821 v->NoEnoughUrgentLatencyHiding = true;
2823 if (v->NoUrgentLatencyHidingPre[k] == true)
2824 v->NoEnoughUrgentLatencyHidingPre = true;
2827 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2829 #ifdef __DML_VBA_DEBUG__
2830 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
2831 dml_print("DML::%s: ReturnBW=%f\n", __func__, v->ReturnBW);
2832 dml_print("DML::%s: FractionOfUrgentBandwidth=%f\n", __func__, v->FractionOfUrgentBandwidth);
2835 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
2836 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
2837 v->PrefetchModeSupported = true;
2839 v->PrefetchModeSupported = false;
2840 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
2841 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
2842 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
2843 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2847 // This error result check was done after the PrefetchModeSupported. So we will
2848 // still try to calculate flip schedule even prefetch mode not supported
2849 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2850 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
2851 v->PrefetchModeSupported = false;
2852 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
2856 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2857 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2858 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2859 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
2861 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2862 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2863 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2865 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2866 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2867 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2870 v->TotImmediateFlipBytes = 0;
2871 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2872 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
2873 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2875 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2876 CalculateFlipSchedule(
2879 HostVMInefficiencyFactor,
2880 v->UrgentExtraLatency,
2882 v->PDEAndMetaPTEBytesFrame[k],
2884 v->PixelPTEBytesPerRow[k]);
2887 v->total_dcn_read_bw_with_flip = 0.0;
2888 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2889 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2890 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
2892 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2893 v->DPPPerPlane[k] * v->final_flip_bw[k]
2894 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
2895 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
2896 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2898 * (v->final_flip_bw[k]
2899 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2900 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2901 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2902 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
2904 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2905 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
2906 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2908 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
2909 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2911 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2913 v->ImmediateFlipSupported = true;
2914 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2915 #ifdef __DML_VBA_DEBUG__
2916 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
2918 v->ImmediateFlipSupported = false;
2919 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2921 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2922 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2923 #ifdef __DML_VBA_DEBUG__
2924 dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k);
2926 v->ImmediateFlipSupported = false;
2930 v->ImmediateFlipSupported = false;
2933 v->PrefetchAndImmediateFlipSupported =
2934 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable
2935 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2936 v->ImmediateFlipSupported)) ? true : false;
2937 #ifdef __DML_VBA_DEBUG__
2938 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
2939 dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required);
2940 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
2941 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
2942 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
2943 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
2945 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
2947 v->VStartupLines = v->VStartupLines + 1;
2948 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2949 ASSERT(v->PrefetchAndImmediateFlipSupported);
2951 // Unbounded Request Enabled
2952 CalculateUnboundedRequestAndCompressedBufferSize(
2953 v->DETBufferSizeInKByte[0],
2954 v->ConfigReturnBufferSizeInKByte,
2955 v->UseUnboundedRequesting,
2959 v->CompressedBufferSegmentSizeInkByte,
2961 &v->UnboundedRequestEnabled,
2962 &v->CompressedBufferSizeInkByte);
2964 //Watermarks and NB P-State/DRAM Clock Change Support
2966 enum clock_change_support DRAMClockChangeSupport; // dummy
2968 CalculateWatermarksAndDRAMSpeedChangeSupport(
2974 v->UrgentExtraLatency,
2984 v->BytePerPixelDETY,
2985 v->BytePerPixelDETC,
2986 v->UnboundedRequestEnabled,
2987 v->CompressedBufferSizeInkByte,
2988 &DRAMClockChangeSupport,
2989 &v->StutterExitWatermark,
2990 &v->StutterEnterPlusExitWatermark,
2991 &v->Z8StutterExitWatermark,
2992 &v->Z8StutterEnterPlusExitWatermark);
2994 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2995 if (v->WritebackEnable[k] == true) {
2996 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
2998 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
3000 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
3005 //Display Pipeline Delivery Time in Prefetch, Groups
3006 CalculatePixelDeliveryTimes(
3007 v->NumberOfActivePlanes,
3012 v->swath_width_luma_ub,
3013 v->swath_width_chroma_ub,
3018 v->PSCL_THROUGHPUT_LUMA,
3019 v->PSCL_THROUGHPUT_CHROMA,
3026 v->BlockWidth256BytesY,
3027 v->BlockHeight256BytesY,
3028 v->BlockWidth256BytesC,
3029 v->BlockHeight256BytesC,
3030 v->DisplayPipeLineDeliveryTimeLuma,
3031 v->DisplayPipeLineDeliveryTimeChroma,
3032 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
3033 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
3034 v->DisplayPipeRequestDeliveryTimeLuma,
3035 v->DisplayPipeRequestDeliveryTimeChroma,
3036 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
3037 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
3038 v->CursorRequestDeliveryTime,
3039 v->CursorRequestDeliveryTimePrefetch);
3041 CalculateMetaAndPTETimes(
3042 v->NumberOfActivePlanes,
3045 v->MinMetaChunkSizeBytes,
3049 v->DestinationLinesToRequestRowInVBlank,
3050 v->DestinationLinesToRequestRowInImmediateFlip,
3057 v->dpte_row_height_chroma,
3059 v->meta_row_width_chroma,
3061 v->meta_row_height_chroma,
3063 v->meta_req_width_chroma,
3065 v->meta_req_height_chroma,
3066 v->dpte_group_bytes,
3069 v->PixelPTEReqWidthY,
3070 v->PixelPTEReqHeightY,
3071 v->PixelPTEReqWidthC,
3072 v->PixelPTEReqHeightC,
3073 v->dpte_row_width_luma_ub,
3074 v->dpte_row_width_chroma_ub,
3075 v->DST_Y_PER_PTE_ROW_NOM_L,
3076 v->DST_Y_PER_PTE_ROW_NOM_C,
3077 v->DST_Y_PER_META_ROW_NOM_L,
3078 v->DST_Y_PER_META_ROW_NOM_C,
3079 v->TimePerMetaChunkNominal,
3080 v->TimePerChromaMetaChunkNominal,
3081 v->TimePerMetaChunkVBlank,
3082 v->TimePerChromaMetaChunkVBlank,
3083 v->TimePerMetaChunkFlip,
3084 v->TimePerChromaMetaChunkFlip,
3085 v->time_per_pte_group_nom_luma,
3086 v->time_per_pte_group_vblank_luma,
3087 v->time_per_pte_group_flip_luma,
3088 v->time_per_pte_group_nom_chroma,
3089 v->time_per_pte_group_vblank_chroma,
3090 v->time_per_pte_group_flip_chroma);
3092 CalculateVMGroupAndRequestTimes(
3093 v->NumberOfActivePlanes,
3095 v->GPUVMMaxPageTableLevels,
3098 v->DestinationLinesToRequestVMInVBlank,
3099 v->DestinationLinesToRequestVMInImmediateFlip,
3102 v->dpte_row_width_luma_ub,
3103 v->dpte_row_width_chroma_ub,
3105 v->dpde0_bytes_per_frame_ub_l,
3106 v->dpde0_bytes_per_frame_ub_c,
3107 v->meta_pte_bytes_per_frame_ub_l,
3108 v->meta_pte_bytes_per_frame_ub_c,
3109 v->TimePerVMGroupVBlank,
3110 v->TimePerVMGroupFlip,
3111 v->TimePerVMRequestVBlank,
3112 v->TimePerVMRequestFlip);
3115 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3116 if (PrefetchMode == 0) {
3117 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3118 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3119 v->MinTTUVBlank[k] = dml_max(
3120 v->DRAMClockChangeWatermark,
3121 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark));
3122 } else if (PrefetchMode == 1) {
3123 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3124 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3125 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark);
3127 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3128 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3129 v->MinTTUVBlank[k] = v->UrgentWatermark;
3131 if (!v->DynamicMetadataEnable[k])
3132 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
3135 // DCC Configuration
3137 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3138 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3139 v->SourcePixelFormat[k],
3140 v->SurfaceWidthY[k],
3141 v->SurfaceWidthC[k],
3142 v->SurfaceHeightY[k],
3143 v->SurfaceHeightC[k],
3144 v->DETBufferSizeInKByte[0] * 1024,
3145 v->BlockHeight256BytesY[k],
3146 v->BlockHeight256BytesC[k],
3147 v->SurfaceTiling[k],
3148 v->BytePerPixelY[k],
3149 v->BytePerPixelC[k],
3150 v->BytePerPixelDETY[k],
3151 v->BytePerPixelDETC[k],
3153 &v->DCCYMaxUncompressedBlock[k],
3154 &v->DCCCMaxUncompressedBlock[k],
3155 &v->DCCYMaxCompressedBlock[k],
3156 &v->DCCCMaxCompressedBlock[k],
3157 &v->DCCYIndependentBlock[k],
3158 &v->DCCCIndependentBlock[k]);
3161 // VStartup Adjustment
3162 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3163 bool isInterlaceTiming;
3164 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
3165 #ifdef __DML_VBA_DEBUG__
3166 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
3169 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
3171 #ifdef __DML_VBA_DEBUG__
3172 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
3173 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
3174 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3175 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
3178 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
3179 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
3180 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
3183 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
3184 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
3185 if (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) {
3186 v->MIN_DST_Y_NEXT_START[k] = dml_floor((v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k]) / 2.0, 1.0);
3188 v->MIN_DST_Y_NEXT_START[k] = v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k];
3190 v->MIN_DST_Y_NEXT_START[k] += dml_floor(4.0 * v->TSetup[k] / ((double)v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0;
3191 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
3192 <= (isInterlaceTiming ?
3193 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
3194 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
3195 v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
3197 v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
3199 #ifdef __DML_VBA_DEBUG__
3200 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
3201 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
3202 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
3203 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
3204 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
3205 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
3206 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
3207 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
3208 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3209 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
3210 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
3215 //Maximum Bandwidth Used
3216 double TotalWRBandwidth = 0;
3217 double MaxPerPlaneVActiveWRBandwidth = 0;
3218 double WRBandwidth = 0;
3220 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3221 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
3222 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3223 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3224 } else if (v->WritebackEnable[k] == true) {
3225 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3226 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3228 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3229 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3232 v->TotalDataReadBandwidth = 0;
3233 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3234 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
3237 // Stutter Efficiency
3238 CalculateStutterEfficiency(
3240 v->CompressedBufferSizeInkByte,
3241 v->UnboundedRequestEnabled,
3242 v->ConfigReturnBufferSizeInKByte,
3243 v->MetaFIFOSizeInKEntries,
3244 v->ZeroSizeBufferEntries,
3245 v->NumberOfActivePlanes,
3246 v->ROBBufferSizeInKByte,
3247 v->TotalDataReadBandwidth,
3250 v->COMPBUF_RESERVED_SPACE_64B,
3251 v->COMPBUF_RESERVED_SPACE_ZS,
3254 v->SynchronizedVBlank,
3255 v->StutterEnterPlusExitWatermark,
3256 v->Z8StutterEnterPlusExitWatermark,
3257 v->ProgressiveToInterlaceUnitInOPP,
3263 v->BytePerPixelDETY,
3269 v->DCCFractionOfZeroSizeRequestsLuma,
3270 v->DCCFractionOfZeroSizeRequestsChroma,
3276 v->BlockHeight256BytesY,
3277 v->BlockWidth256BytesY,
3278 v->BlockHeight256BytesC,
3279 v->BlockWidth256BytesC,
3280 v->DCCYMaxUncompressedBlock,
3281 v->DCCCMaxUncompressedBlock,
3285 v->ReadBandwidthPlaneLuma,
3286 v->ReadBandwidthPlaneChroma,
3289 &v->StutterEfficiencyNotIncludingVBlank,
3290 &v->StutterEfficiency,
3291 &v->NumberOfStutterBurstsPerFrame,
3292 &v->Z8StutterEfficiencyNotIncludingVBlank,
3293 &v->Z8StutterEfficiency,
3294 &v->Z8NumberOfStutterBurstsPerFrame,
3298 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3300 struct vba_vars_st *v = &mode_lib->vba;
3301 // Display Pipe Configuration
3302 double BytePerPixDETY[DC__NUM_DPP__MAX];
3303 double BytePerPixDETC[DC__NUM_DPP__MAX];
3304 int BytePerPixY[DC__NUM_DPP__MAX];
3305 int BytePerPixC[DC__NUM_DPP__MAX];
3306 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
3307 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
3308 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
3309 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
3310 double dummy1[DC__NUM_DPP__MAX];
3311 double dummy2[DC__NUM_DPP__MAX];
3312 double dummy3[DC__NUM_DPP__MAX];
3313 double dummy4[DC__NUM_DPP__MAX];
3314 int dummy5[DC__NUM_DPP__MAX];
3315 int dummy6[DC__NUM_DPP__MAX];
3316 bool dummy7[DC__NUM_DPP__MAX];
3317 bool dummysinglestring;
3321 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3323 CalculateBytePerPixelAnd256BBlockSizes(
3324 v->SourcePixelFormat[k],
3325 v->SurfaceTiling[k],
3330 &Read256BytesBlockHeightY[k],
3331 &Read256BytesBlockHeightC[k],
3332 &Read256BytesBlockWidthY[k],
3333 &Read256BytesBlockWidthC[k]);
3336 CalculateSwathAndDETConfiguration(
3338 v->NumberOfActivePlanes,
3339 v->DETBufferSizeInKByte[0],
3343 v->SourcePixelFormat,
3351 Read256BytesBlockHeightY,
3352 Read256BytesBlockHeightC,
3353 Read256BytesBlockWidthY,
3354 Read256BytesBlockWidthC,
3355 v->ODMCombineEnabled,
3356 v->BlendingAndTiming,
3374 &dummysinglestring);
3377 static bool CalculateBytePerPixelAnd256BBlockSizes(
3378 enum source_format_class SourcePixelFormat,
3379 enum dm_swizzle_mode SurfaceTiling,
3380 unsigned int *BytePerPixelY,
3381 unsigned int *BytePerPixelC,
3382 double *BytePerPixelDETY,
3383 double *BytePerPixelDETC,
3384 unsigned int *BlockHeight256BytesY,
3385 unsigned int *BlockHeight256BytesC,
3386 unsigned int *BlockWidth256BytesY,
3387 unsigned int *BlockWidth256BytesC)
3389 if (SourcePixelFormat == dm_444_64) {
3390 *BytePerPixelDETY = 8;
3391 *BytePerPixelDETC = 0;
3394 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3395 *BytePerPixelDETY = 4;
3396 *BytePerPixelDETC = 0;
3399 } else if (SourcePixelFormat == dm_444_16) {
3400 *BytePerPixelDETY = 2;
3401 *BytePerPixelDETC = 0;
3404 } else if (SourcePixelFormat == dm_444_8) {
3405 *BytePerPixelDETY = 1;
3406 *BytePerPixelDETC = 0;
3409 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3410 *BytePerPixelDETY = 4;
3411 *BytePerPixelDETC = 1;
3414 } else if (SourcePixelFormat == dm_420_8) {
3415 *BytePerPixelDETY = 1;
3416 *BytePerPixelDETC = 2;
3419 } else if (SourcePixelFormat == dm_420_12) {
3420 *BytePerPixelDETY = 2;
3421 *BytePerPixelDETC = 4;
3425 *BytePerPixelDETY = 4.0 / 3;
3426 *BytePerPixelDETC = 8.0 / 3;
3431 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
3432 || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
3433 if (SurfaceTiling == dm_sw_linear) {
3434 *BlockHeight256BytesY = 1;
3435 } else if (SourcePixelFormat == dm_444_64) {
3436 *BlockHeight256BytesY = 4;
3437 } else if (SourcePixelFormat == dm_444_8) {
3438 *BlockHeight256BytesY = 16;
3440 *BlockHeight256BytesY = 8;
3442 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3443 *BlockHeight256BytesC = 0;
3444 *BlockWidth256BytesC = 0;
3446 if (SurfaceTiling == dm_sw_linear) {
3447 *BlockHeight256BytesY = 1;
3448 *BlockHeight256BytesC = 1;
3449 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3450 *BlockHeight256BytesY = 8;
3451 *BlockHeight256BytesC = 16;
3452 } else if (SourcePixelFormat == dm_420_8) {
3453 *BlockHeight256BytesY = 16;
3454 *BlockHeight256BytesC = 8;
3456 *BlockHeight256BytesY = 8;
3457 *BlockHeight256BytesC = 8;
3459 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3460 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3465 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
3467 if (PrefetchMode == 0) {
3468 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency));
3469 } else if (PrefetchMode == 1) {
3470 return dml_max(SREnterPlusExitTime, UrgentLatency);
3472 return UrgentLatency;
3476 double dml314_CalculateWriteBackDISPCLK(
3477 enum source_format_class WritebackPixelFormat,
3479 double WritebackHRatio,
3480 double WritebackVRatio,
3481 unsigned int WritebackHTaps,
3482 unsigned int WritebackVTaps,
3483 long WritebackSourceWidth,
3484 long WritebackDestinationWidth,
3485 unsigned int HTotal,
3486 unsigned int WritebackLineBufferSize)
3488 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
3490 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3491 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3492 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3493 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3496 static double CalculateWriteBackDelay(
3497 enum source_format_class WritebackPixelFormat,
3498 double WritebackHRatio,
3499 double WritebackVRatio,
3500 unsigned int WritebackVTaps,
3501 int WritebackDestinationWidth,
3502 int WritebackDestinationHeight,
3503 int WritebackSourceHeight,
3504 unsigned int HTotal)
3506 double CalculateWriteBackDelay;
3508 double Output_lines_last_notclamped;
3509 double WritebackVInit;
3511 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3512 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3513 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3514 if (Output_lines_last_notclamped < 0) {
3515 CalculateWriteBackDelay = 0;
3517 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3519 return CalculateWriteBackDelay;
3522 static void CalculateVupdateAndDynamicMetadataParameters(
3523 int MaxInterDCNTileRepeaters,
3526 double DCFClkDeepSleep,
3530 int DynamicMetadataTransmittedBytes,
3531 int DynamicMetadataLinesBeforeActiveRequired,
3532 int InterlaceEnable,
3533 bool ProgressiveToInterlaceUnitInOPP,
3538 int *VUpdateOffsetPix,
3539 double *VUpdateWidthPix,
3540 double *VReadyOffsetPix)
3542 double TotalRepeaterDelayTime;
3544 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3545 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0);
3546 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0);
3547 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3548 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3549 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3550 *Tdmec = HTotal / PixelClock;
3551 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3552 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3554 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3556 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3557 *Tdmsks = *Tdmsks / 2;
3559 #ifdef __DML_VBA_DEBUG__
3560 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3561 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3562 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3566 static void CalculateRowBandwidth(
3568 enum source_format_class SourcePixelFormat,
3570 double VRatioChroma,
3573 unsigned int MetaRowByteLuma,
3574 unsigned int MetaRowByteChroma,
3575 unsigned int meta_row_height_luma,
3576 unsigned int meta_row_height_chroma,
3577 unsigned int PixelPTEBytesPerRowLuma,
3578 unsigned int PixelPTEBytesPerRowChroma,
3579 unsigned int dpte_row_height_luma,
3580 unsigned int dpte_row_height_chroma,
3581 double *meta_row_bw,
3582 double *dpte_row_bw)
3584 if (DCCEnable != true) {
3586 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3587 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
3589 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3592 if (GPUVMEnable != true) {
3594 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3595 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3596 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
3598 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3602 static void CalculateFlipSchedule(
3603 struct display_mode_lib *mode_lib,
3605 double HostVMInefficiencyFactor,
3606 double UrgentExtraLatency,
3607 double UrgentLatency,
3608 double PDEAndMetaPTEBytesPerFrame,
3609 double MetaRowBytes,
3610 double DPTEBytesPerRow)
3612 struct vba_vars_st *v = &mode_lib->vba;
3613 double min_row_time = 0.0;
3614 unsigned int HostVMDynamicLevelsTrips;
3615 double TimeForFetchingMetaPTEImmediateFlip;
3616 double TimeForFetchingRowInVBlankImmediateFlip;
3617 double ImmediateFlipBW = 1.0;
3618 double LineTime = v->HTotal[k] / v->PixelClock[k];
3620 if (v->GPUVMEnable == true && v->HostVMEnable == true) {
3621 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3623 HostVMDynamicLevelsTrips = 0;
3626 if (v->GPUVMEnable == true || v->DCCEnable[k] == true) {
3627 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes;
3630 if (v->GPUVMEnable == true) {
3631 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3632 v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3633 UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
3636 TimeForFetchingMetaPTEImmediateFlip = 0;
3639 v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3640 if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3641 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
3642 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3643 UrgentLatency * (HostVMDynamicLevelsTrips + 1),
3646 TimeForFetchingRowInVBlankImmediateFlip = 0;
3649 v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3651 if (v->GPUVMEnable == true) {
3652 v->final_flip_bw[k] = dml_max(
3653 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime),
3654 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime));
3655 } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3656 v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime);
3658 v->final_flip_bw[k] = 0;
3661 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
3662 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3663 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3664 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3665 min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3667 min_row_time = dml_min4(
3668 v->dpte_row_height[k] * LineTime / v->VRatio[k],
3669 v->meta_row_height[k] * LineTime / v->VRatio[k],
3670 v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k],
3671 v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3674 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3675 min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k];
3676 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3677 min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k];
3679 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]);
3683 if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16
3684 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3685 v->ImmediateFlipSupportedForPipe[k] = false;
3687 v->ImmediateFlipSupportedForPipe[k] = true;
3690 #ifdef __DML_VBA_DEBUG__
3691 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]);
3692 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]);
3693 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
3694 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
3695 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
3696 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]);
3701 static double TruncToValidBPP(
3709 enum output_encoder_class Output,
3710 enum output_format_class Format,
3711 unsigned int DSCInputBitPerComponent,
3715 enum odm_combine_mode ODMCombine)
3724 if (Format == dm_420) {
3729 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
3730 } else if (Format == dm_444) {
3735 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3742 if (Format == dm_n422) {
3744 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3747 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3751 if (DSCEnable && Output == dm_dp) {
3752 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3754 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3757 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3759 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3763 if (DesiredBPP == 0) {
3765 if (MaxLinkBPP < MinDSCBPP) {
3767 } else if (MaxLinkBPP >= MaxDSCBPP) {
3770 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3773 if (MaxLinkBPP >= NonDSCBPP2) {
3775 } else if (MaxLinkBPP >= NonDSCBPP1) {
3777 } else if (MaxLinkBPP >= NonDSCBPP0) {
3784 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
3785 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3794 static noinline void CalculatePrefetchSchedulePerPlane(
3795 struct display_mode_lib *mode_lib,
3796 double HostVMInefficiencyFactor,
3801 struct vba_vars_st *v = &mode_lib->vba;
3804 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
3805 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
3806 myPipe.PixelClock = v->PixelClock[k];
3807 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
3808 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
3809 myPipe.ScalerEnabled = v->ScalerEnabled[k];
3810 myPipe.VRatio = mode_lib->vba.VRatio[k];
3811 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k];
3813 myPipe.SourceScan = v->SourceScan[k];
3814 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
3815 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
3816 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
3817 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
3818 myPipe.InterlaceEnable = v->Interlace[k];
3819 myPipe.NumberOfCursors = v->NumberOfCursors[k];
3820 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
3821 myPipe.HTotal = v->HTotal[k];
3822 myPipe.DCCEnable = v->DCCEnable[k];
3823 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
3824 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
3825 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
3826 myPipe.BytePerPixelY = v->BytePerPixelY[k];
3827 myPipe.BytePerPixelC = v->BytePerPixelC[k];
3828 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
3829 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
3831 HostVMInefficiencyFactor,
3833 v->DSCDelayPerState[i][k],
3834 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
3836 v->DPPCLKDelaySCLLBOnly,
3837 v->DPPCLKDelayCNVCCursor,
3838 v->DISPCLKDelaySubtotal,
3839 v->SwathWidthYThisState[k] / v->HRatio[k],
3841 v->MaxInterDCNTileRepeaters,
3842 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
3843 v->MaximumVStartup[i][j][k],
3844 v->GPUVMMaxPageTableLevels,
3847 v->HostVMMaxNonCachedPageTableLevels,
3848 v->HostVMMinPageSize,
3849 v->DynamicMetadataEnable[k],
3850 v->DynamicMetadataVMEnabled,
3851 v->DynamicMetadataLinesBeforeActiveRequired[k],
3852 v->DynamicMetadataTransmittedBytes[k],
3856 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
3857 v->MetaRowBytes[i][j][k],
3858 v->DPTEBytesPerRow[i][j][k],
3859 v->PrefetchLinesY[i][j][k],
3860 v->SwathWidthYThisState[k],
3863 v->PrefetchLinesC[i][j][k],
3864 v->SwathWidthCThisState[k],
3867 v->swath_width_luma_ub_this_state[k],
3868 v->swath_width_chroma_ub_this_state[k],
3869 v->SwathHeightYThisState[k],
3870 v->SwathHeightCThisState[k],
3872 &v->DSTXAfterScaler[k],
3873 &v->DSTYAfterScaler[k],
3874 &v->LineTimesForPrefetch[k],
3876 &v->LinesForMetaPTE[k],
3877 &v->LinesForMetaAndDPTERow[k],
3878 &v->VRatioPreY[i][j][k],
3879 &v->VRatioPreC[i][j][k],
3880 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
3881 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
3882 &v->NoTimeForDynamicMetadata[i][j][k],
3884 &v->prefetch_vmrow_bw[k],
3888 &v->VUpdateOffsetPix[k],
3889 &v->VUpdateWidthPix[k],
3890 &v->VReadyOffsetPix[k]);
3893 void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3895 struct vba_vars_st *v = &mode_lib->vba;
3899 int ReorderingBytes;
3900 int MinPrefetchMode = 0, MaxPrefetchMode = 2;
3901 bool NoChroma = true;
3902 bool EnoughWritebackUnits = true;
3903 bool P2IWith420 = false;
3904 bool DSCOnlyIfNecessaryWithBPP = false;
3905 bool DSC422NativeNotSupported = false;
3906 double MaxTotalVActiveRDBandwidth;
3907 bool ViewportExceedsSurface = false;
3908 bool FMTBufferExceeded = false;
3910 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3912 CalculateMinAndMaxPrefetchMode(
3913 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3914 &MinPrefetchMode, &MaxPrefetchMode);
3916 /*Scale Ratio, taps Support Check*/
3918 v->ScaleRatioAndTapsSupport = true;
3919 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3920 if (v->ScalerEnabled[k] == false
3921 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3922 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3923 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3924 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
3925 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
3926 v->ScaleRatioAndTapsSupport = false;
3927 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3928 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
3929 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
3930 || v->VRatio[k] > v->vtaps[k]
3931 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3932 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3933 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3934 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
3935 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3936 || v->HRatioChroma[k] > v->MaxHSCLRatio
3937 || v->VRatioChroma[k] > v->MaxVSCLRatio
3938 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3939 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3940 v->ScaleRatioAndTapsSupport = false;
3943 /*Source Format, Pixel Format and Scan Support Check*/
3945 v->SourceFormatPixelAndScanSupport = true;
3946 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3947 if (v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) {
3948 v->SourceFormatPixelAndScanSupport = false;
3951 /*Bandwidth Support Check*/
3953 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3954 CalculateBytePerPixelAnd256BBlockSizes(
3955 v->SourcePixelFormat[k],
3956 v->SurfaceTiling[k],
3957 &v->BytePerPixelY[k],
3958 &v->BytePerPixelC[k],
3959 &v->BytePerPixelInDETY[k],
3960 &v->BytePerPixelInDETC[k],
3961 &v->Read256BlockHeightY[k],
3962 &v->Read256BlockHeightC[k],
3963 &v->Read256BlockWidthY[k],
3964 &v->Read256BlockWidthC[k]);
3966 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3967 if (v->SourceScan[k] != dm_vert) {
3968 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3969 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3971 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3972 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3975 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3976 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
3977 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3978 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
3979 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3981 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3982 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
3983 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3984 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
3985 } else if (v->WritebackEnable[k] == true) {
3986 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3987 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
3989 v->WriteBandwidth[k] = 0.0;
3993 /*Writeback Latency support check*/
3995 v->WritebackLatencySupport = true;
3996 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3997 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
3998 v->WritebackLatencySupport = false;
4002 /*Writeback Mode Support Check*/
4004 v->TotalNumberOfActiveWriteback = 0;
4005 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4006 if (v->WritebackEnable[k] == true) {
4007 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
4011 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
4012 EnoughWritebackUnits = false;
4015 /*Writeback Scale Ratio and Taps Support Check*/
4017 v->WritebackScaleRatioAndTapsSupport = true;
4018 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4019 if (v->WritebackEnable[k] == true) {
4020 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
4021 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
4022 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
4023 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
4024 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
4025 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
4026 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
4027 v->WritebackScaleRatioAndTapsSupport = false;
4029 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
4030 v->WritebackScaleRatioAndTapsSupport = false;
4034 /*Maximum DISPCLK/DPPCLK Support check*/
4036 v->WritebackRequiredDISPCLK = 0.0;
4037 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4038 if (v->WritebackEnable[k] == true) {
4039 v->WritebackRequiredDISPCLK = dml_max(
4040 v->WritebackRequiredDISPCLK,
4041 dml314_CalculateWriteBackDISPCLK(
4042 v->WritebackPixelFormat[k],
4044 v->WritebackHRatio[k],
4045 v->WritebackVRatio[k],
4046 v->WritebackHTaps[k],
4047 v->WritebackVTaps[k],
4048 v->WritebackSourceWidth[k],
4049 v->WritebackDestinationWidth[k],
4051 v->WritebackLineBufferSize));
4054 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4055 if (v->HRatio[k] > 1.0) {
4056 v->PSCL_FACTOR[k] = dml_min(
4057 v->MaxDCHUBToPSCLThroughput,
4058 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
4060 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4062 if (v->BytePerPixelC[k] == 0.0) {
4063 v->PSCL_FACTOR_CHROMA[k] = 0.0;
4064 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4066 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4067 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4069 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4070 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4073 if (v->HRatioChroma[k] > 1.0) {
4074 v->PSCL_FACTOR_CHROMA[k] = dml_min(
4075 v->MaxDCHUBToPSCLThroughput,
4076 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
4078 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4080 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4082 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4083 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4084 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
4085 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
4087 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
4088 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4089 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4093 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4094 int MaximumSwathWidthSupportLuma;
4095 int MaximumSwathWidthSupportChroma;
4097 if (v->SurfaceTiling[k] == dm_sw_linear) {
4098 MaximumSwathWidthSupportLuma = 8192.0;
4099 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
4100 MaximumSwathWidthSupportLuma = 2880.0;
4101 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4102 MaximumSwathWidthSupportLuma = 3840.0;
4104 MaximumSwathWidthSupportLuma = 5760.0;
4107 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
4108 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
4110 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
4112 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
4113 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
4114 if (v->BytePerPixelC[k] == 0.0) {
4115 v->MaximumSwathWidthInLineBufferChroma = 0;
4117 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
4118 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
4120 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
4121 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
4124 CalculateSwathAndDETConfiguration(
4126 v->NumberOfActivePlanes,
4127 v->DETBufferSizeInKByte[0],
4128 v->MaximumSwathWidthLuma,
4129 v->MaximumSwathWidthChroma,
4131 v->SourcePixelFormat,
4139 v->Read256BlockHeightY,
4140 v->Read256BlockHeightC,
4141 v->Read256BlockWidthY,
4142 v->Read256BlockWidthC,
4143 v->odm_combine_dummy,
4144 v->BlendingAndTiming,
4147 v->BytePerPixelInDETY,
4148 v->BytePerPixelInDETC,
4152 v->NoOfDPPThisState,
4153 v->swath_width_luma_ub_this_state,
4154 v->swath_width_chroma_ub_this_state,
4155 v->SwathWidthYThisState,
4156 v->SwathWidthCThisState,
4157 v->SwathHeightYThisState,
4158 v->SwathHeightCThisState,
4159 v->DETBufferSizeYThisState,
4160 v->DETBufferSizeCThisState,
4161 v->SingleDPPViewportSizeSupportPerPlane,
4162 &v->ViewportSizeSupport[0][0]);
4164 for (i = 0; i < v->soc.num_states; i++) {
4165 for (j = 0; j < 2; j++) {
4166 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
4167 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
4168 v->RequiredDISPCLK[i][j] = 0.0;
4169 v->DISPCLK_DPPCLK_Support[i][j] = true;
4170 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4171 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4172 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4173 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
4174 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4175 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4176 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
4177 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4179 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4180 * (1 + v->DISPCLKRampingMargin / 100.0);
4181 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
4182 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4183 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4184 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
4185 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4187 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4188 * (1 + v->DISPCLKRampingMargin / 100.0);
4189 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
4190 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4191 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4192 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
4193 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4196 if (v->ODMCombinePolicy == dm_odm_combine_policy_none
4197 || !(v->Output[k] == dm_dp ||
4198 v->Output[k] == dm_dp2p0 ||
4199 v->Output[k] == dm_edp)) {
4200 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4201 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4203 if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH)
4204 FMTBufferExceeded = true;
4205 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
4206 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4207 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4208 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
4209 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
4210 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4211 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4212 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
4213 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4214 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4216 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4217 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4219 if (v->DSCEnabled[k] && v->HActive[k] > DCN314_MAX_DSC_IMAGE_WIDTH
4220 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4221 if (v->HActive[k] / 2 > DCN314_MAX_DSC_IMAGE_WIDTH) {
4222 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4223 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4225 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4226 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4229 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN314_MAX_FMT_420_BUFFER_WIDTH
4230 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4231 if (v->Output[k] == dm_hdmi) {
4232 FMTBufferExceeded = true;
4233 } else if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH) {
4234 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4235 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4237 if (v->HActive[k] / 4 > DCN314_MAX_FMT_420_BUFFER_WIDTH)
4238 FMTBufferExceeded = true;
4240 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4241 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4244 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4245 v->MPCCombine[i][j][k] = false;
4246 v->NoOfDPP[i][j][k] = 4;
4247 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4248 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4249 v->MPCCombine[i][j][k] = false;
4250 v->NoOfDPP[i][j][k] = 2;
4251 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4252 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4253 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4254 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4255 v->MPCCombine[i][j][k] = false;
4256 v->NoOfDPP[i][j][k] = 1;
4257 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4259 v->MPCCombine[i][j][k] = true;
4260 v->NoOfDPP[i][j][k] = 2;
4261 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4263 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4264 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4265 > v->MaxDppclkRoundedDownToDFSGranularity)
4266 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4267 v->DISPCLK_DPPCLK_Support[i][j] = false;
4270 v->TotalNumberOfActiveDPP[i][j] = 0;
4271 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4272 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4273 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4274 if (v->NoOfDPP[i][j][k] == 1)
4275 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4276 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4277 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
4282 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
4283 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) {
4284 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4285 double BWOfNonSplitPlaneOfMaximumBandwidth;
4286 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4288 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4289 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4290 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4291 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4292 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4293 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4294 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4297 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4298 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4299 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4300 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4301 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4302 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4303 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4306 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4307 v->RequiredDISPCLK[i][j] = 0.0;
4308 v->DISPCLK_DPPCLK_Support[i][j] = true;
4309 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4310 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4311 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4312 v->MPCCombine[i][j][k] = true;
4313 v->NoOfDPP[i][j][k] = 2;
4314 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4315 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4317 v->MPCCombine[i][j][k] = false;
4318 v->NoOfDPP[i][j][k] = 1;
4319 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4320 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4322 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4323 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4324 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4325 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4327 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4329 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4330 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4331 > v->MaxDppclkRoundedDownToDFSGranularity)
4332 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4333 v->DISPCLK_DPPCLK_Support[i][j] = false;
4336 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4337 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4338 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4341 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4342 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4343 v->DISPCLK_DPPCLK_Support[i][j] = false;
4348 /*Total Available Pipes Support Check*/
4350 for (i = 0; i < v->soc.num_states; i++) {
4351 for (j = 0; j < 2; j++) {
4352 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4353 v->TotalAvailablePipesSupport[i][j] = true;
4355 v->TotalAvailablePipesSupport[i][j] = false;
4359 /*Display IO and DSC Support Check*/
4361 v->NonsupportedDSCInputBPC = false;
4362 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4363 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
4364 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
4365 v->NonsupportedDSCInputBPC = true;
4369 /*Number Of DSC Slices*/
4370 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4371 if (v->BlendingAndTiming[k] == k) {
4372 if (v->PixelClockBackEnd[k] > 3200) {
4373 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4374 } else if (v->PixelClockBackEnd[k] > 1360) {
4375 v->NumberOfDSCSlices[k] = 8;
4376 } else if (v->PixelClockBackEnd[k] > 680) {
4377 v->NumberOfDSCSlices[k] = 4;
4378 } else if (v->PixelClockBackEnd[k] > 340) {
4379 v->NumberOfDSCSlices[k] = 2;
4381 v->NumberOfDSCSlices[k] = 1;
4384 v->NumberOfDSCSlices[k] = 0;
4388 for (i = 0; i < v->soc.num_states; i++) {
4389 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4390 v->RequiresDSC[i][k] = false;
4391 v->RequiresFEC[i][k] = false;
4392 if (v->BlendingAndTiming[k] == k) {
4393 if (v->Output[k] == dm_hdmi) {
4394 v->RequiresDSC[i][k] = false;
4395 v->RequiresFEC[i][k] = false;
4396 v->OutputBppPerState[i][k] = TruncToValidBPP(
4397 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4401 v->PixelClockBackEnd[k],
4402 v->ForcedOutputLinkBPP[k],
4406 v->DSCInputBitPerComponent[k],
4407 v->NumberOfDSCSlices[k],
4408 v->AudioSampleRate[k],
4409 v->AudioSampleLayout[k],
4410 v->ODMCombineEnablePerState[i][k]);
4411 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_dp2p0) {
4412 if (v->DSCEnable[k] == true) {
4413 v->RequiresDSC[i][k] = true;
4414 v->LinkDSCEnable = true;
4415 if (v->Output[k] == dm_dp || v->Output[k] == dm_dp2p0) {
4416 v->RequiresFEC[i][k] = true;
4418 v->RequiresFEC[i][k] = false;
4421 v->RequiresDSC[i][k] = false;
4422 v->LinkDSCEnable = false;
4423 if (v->Output[k] == dm_dp2p0) {
4424 v->RequiresFEC[i][k] = true;
4426 v->RequiresFEC[i][k] = false;
4429 if (v->Output[k] == dm_dp2p0) {
4430 v->Outbpp = BPP_INVALID;
4431 if ((v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr10) &&
4432 v->PHYCLKD18PerState[k] >= 10000.0 / 18.0) {
4433 v->Outbpp = TruncToValidBPP(
4434 (1.0 - v->Downspreading / 100.0) * 10000,
4435 v->OutputLinkDPLanes[k],
4438 v->PixelClockBackEnd[k],
4439 v->ForcedOutputLinkBPP[k],
4443 v->DSCInputBitPerComponent[k],
4444 v->NumberOfDSCSlices[k],
4445 v->AudioSampleRate[k],
4446 v->AudioSampleLayout[k],
4447 v->ODMCombineEnablePerState[i][k]);
4448 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 13500.0 / 18.0 &&
4449 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
4450 v->RequiresDSC[i][k] = true;
4451 v->LinkDSCEnable = true;
4452 v->Outbpp = TruncToValidBPP(
4453 (1.0 - v->Downspreading / 100.0) * 10000,
4454 v->OutputLinkDPLanes[k],
4457 v->PixelClockBackEnd[k],
4458 v->ForcedOutputLinkBPP[k],
4462 v->DSCInputBitPerComponent[k],
4463 v->NumberOfDSCSlices[k],
4464 v->AudioSampleRate[k],
4465 v->AudioSampleLayout[k],
4466 v->ODMCombineEnablePerState[i][k]);
4468 v->OutputBppPerState[i][k] = v->Outbpp;
4469 // TODO: Need some other way to handle this nonsense
4470 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR10"
4472 if (v->Outbpp == BPP_INVALID &&
4473 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr13p5) &&
4474 v->PHYCLKD18PerState[k] >= 13500.0 / 18.0) {
4475 v->Outbpp = TruncToValidBPP(
4476 (1.0 - v->Downspreading / 100.0) * 13500,
4477 v->OutputLinkDPLanes[k],
4480 v->PixelClockBackEnd[k],
4481 v->ForcedOutputLinkBPP[k],
4485 v->DSCInputBitPerComponent[k],
4486 v->NumberOfDSCSlices[k],
4487 v->AudioSampleRate[k],
4488 v->AudioSampleLayout[k],
4489 v->ODMCombineEnablePerState[i][k]);
4490 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 20000.0 / 18.0 &&
4491 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
4492 v->RequiresDSC[i][k] = true;
4493 v->LinkDSCEnable = true;
4494 v->Outbpp = TruncToValidBPP(
4495 (1.0 - v->Downspreading / 100.0) * 13500,
4496 v->OutputLinkDPLanes[k],
4499 v->PixelClockBackEnd[k],
4500 v->ForcedOutputLinkBPP[k],
4504 v->DSCInputBitPerComponent[k],
4505 v->NumberOfDSCSlices[k],
4506 v->AudioSampleRate[k],
4507 v->AudioSampleLayout[k],
4508 v->ODMCombineEnablePerState[i][k]);
4510 v->OutputBppPerState[i][k] = v->Outbpp;
4511 // TODO: Need some other way to handle this nonsense
4512 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR13p5"
4514 if (v->Outbpp == BPP_INVALID &&
4515 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr20) &&
4516 v->PHYCLKD18PerState[k] >= 20000.0 / 18.0) {
4517 v->Outbpp = TruncToValidBPP(
4518 (1.0 - v->Downspreading / 100.0) * 20000,
4519 v->OutputLinkDPLanes[k],
4522 v->PixelClockBackEnd[k],
4523 v->ForcedOutputLinkBPP[k],
4527 v->DSCInputBitPerComponent[k],
4528 v->NumberOfDSCSlices[k],
4529 v->AudioSampleRate[k],
4530 v->AudioSampleLayout[k],
4531 v->ODMCombineEnablePerState[i][k]);
4532 if (v->Outbpp == BPP_INVALID && v->DSCEnable[k] == true &&
4533 v->ForcedOutputLinkBPP[k] == 0) {
4534 v->RequiresDSC[i][k] = true;
4535 v->LinkDSCEnable = true;
4536 v->Outbpp = TruncToValidBPP(
4537 (1.0 - v->Downspreading / 100.0) * 20000,
4538 v->OutputLinkDPLanes[k],
4541 v->PixelClockBackEnd[k],
4542 v->ForcedOutputLinkBPP[k],
4546 v->DSCInputBitPerComponent[k],
4547 v->NumberOfDSCSlices[k],
4548 v->AudioSampleRate[k],
4549 v->AudioSampleLayout[k],
4550 v->ODMCombineEnablePerState[i][k]);
4552 v->OutputBppPerState[i][k] = v->Outbpp;
4553 // TODO: Need some other way to handle this nonsense
4554 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR20"
4557 v->Outbpp = BPP_INVALID;
4558 if (v->PHYCLKPerState[i] >= 270.0) {
4559 v->Outbpp = TruncToValidBPP(
4560 (1.0 - v->Downspreading / 100.0) * 2700,
4561 v->OutputLinkDPLanes[k],
4564 v->PixelClockBackEnd[k],
4565 v->ForcedOutputLinkBPP[k],
4569 v->DSCInputBitPerComponent[k],
4570 v->NumberOfDSCSlices[k],
4571 v->AudioSampleRate[k],
4572 v->AudioSampleLayout[k],
4573 v->ODMCombineEnablePerState[i][k]);
4574 v->OutputBppPerState[i][k] = v->Outbpp;
4575 // TODO: Need some other way to handle this nonsense
4576 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4578 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4579 v->Outbpp = TruncToValidBPP(
4580 (1.0 - v->Downspreading / 100.0) * 5400,
4581 v->OutputLinkDPLanes[k],
4584 v->PixelClockBackEnd[k],
4585 v->ForcedOutputLinkBPP[k],
4589 v->DSCInputBitPerComponent[k],
4590 v->NumberOfDSCSlices[k],
4591 v->AudioSampleRate[k],
4592 v->AudioSampleLayout[k],
4593 v->ODMCombineEnablePerState[i][k]);
4594 v->OutputBppPerState[i][k] = v->Outbpp;
4595 // TODO: Need some other way to handle this nonsense
4596 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4598 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4599 v->Outbpp = TruncToValidBPP(
4600 (1.0 - v->Downspreading / 100.0) * 8100,
4601 v->OutputLinkDPLanes[k],
4604 v->PixelClockBackEnd[k],
4605 v->ForcedOutputLinkBPP[k],
4609 v->DSCInputBitPerComponent[k],
4610 v->NumberOfDSCSlices[k],
4611 v->AudioSampleRate[k],
4612 v->AudioSampleLayout[k],
4613 v->ODMCombineEnablePerState[i][k]);
4614 v->OutputBppPerState[i][k] = v->Outbpp;
4615 // TODO: Need some other way to handle this nonsense
4616 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4621 v->OutputBppPerState[i][k] = 0;
4626 for (i = 0; i < v->soc.num_states; i++) {
4627 v->LinkCapacitySupport[i] = true;
4628 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4629 if (v->BlendingAndTiming[k] == k
4630 && (v->Output[k] == dm_dp ||
4631 v->Output[k] == dm_edp ||
4632 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
4633 v->LinkCapacitySupport[i] = false;
4639 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4640 if (v->BlendingAndTiming[k] == k
4641 && (v->Output[k] == dm_dp ||
4642 v->Output[k] == dm_edp ||
4643 v->Output[k] == dm_hdmi)) {
4644 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
4647 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
4648 && !v->DSC422NativeSupport) {
4649 DSC422NativeNotSupported = true;
4655 for (i = 0; i < v->soc.num_states; ++i) {
4656 v->ODMCombine4To1SupportCheckOK[i] = true;
4657 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4658 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4659 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
4660 || v->Output[k] == dm_hdmi)) {
4661 v->ODMCombine4To1SupportCheckOK[i] = false;
4666 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4668 for (i = 0; i < v->soc.num_states; i++) {
4669 v->NotEnoughDSCUnits[i] = false;
4670 v->TotalDSCUnitsRequired = 0.0;
4671 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4672 if (v->RequiresDSC[i][k] == true) {
4673 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4674 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4675 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4676 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4678 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4682 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4683 v->NotEnoughDSCUnits[i] = true;
4686 /*DSC Delay per state*/
4688 for (i = 0; i < v->soc.num_states; i++) {
4689 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4690 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4693 v->BPP = v->OutputBppPerState[i][k];
4695 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4696 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4697 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4698 v->DSCInputBitPerComponent[k],
4700 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4701 v->NumberOfDSCSlices[k],
4703 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4704 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4705 v->DSCDelayPerState[i][k] = 2.0
4706 * (dscceComputeDelay(
4707 v->DSCInputBitPerComponent[k],
4709 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4710 v->NumberOfDSCSlices[k] / 2,
4712 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4714 v->DSCDelayPerState[i][k] = 4.0
4715 * (dscceComputeDelay(
4716 v->DSCInputBitPerComponent[k],
4718 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4719 v->NumberOfDSCSlices[k] / 4,
4721 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4723 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] + (v->HTotal[k] - v->HActive[k]) * dml_ceil((double) v->DSCDelayPerState[i][k] / v->HActive[k], 1.0);
4724 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4726 v->DSCDelayPerState[i][k] = 0.0;
4729 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4730 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4731 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4732 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4738 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4740 for (i = 0; i < v->soc.num_states; ++i) {
4741 for (j = 0; j <= 1; ++j) {
4742 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4743 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4744 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4745 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4748 CalculateSwathAndDETConfiguration(
4750 v->NumberOfActivePlanes,
4751 v->DETBufferSizeInKByte[0],
4752 v->MaximumSwathWidthLuma,
4753 v->MaximumSwathWidthChroma,
4755 v->SourcePixelFormat,
4763 v->Read256BlockHeightY,
4764 v->Read256BlockHeightC,
4765 v->Read256BlockWidthY,
4766 v->Read256BlockWidthC,
4767 v->ODMCombineEnableThisState,
4768 v->BlendingAndTiming,
4771 v->BytePerPixelInDETY,
4772 v->BytePerPixelInDETC,
4776 v->NoOfDPPThisState,
4777 v->swath_width_luma_ub_this_state,
4778 v->swath_width_chroma_ub_this_state,
4779 v->SwathWidthYThisState,
4780 v->SwathWidthCThisState,
4781 v->SwathHeightYThisState,
4782 v->SwathHeightCThisState,
4783 v->DETBufferSizeYThisState,
4784 v->DETBufferSizeCThisState,
4786 &v->ViewportSizeSupport[i][j]);
4788 CalculateDCFCLKDeepSleep(
4790 v->NumberOfActivePlanes,
4795 v->SwathWidthYThisState,
4796 v->SwathWidthCThisState,
4797 v->NoOfDPPThisState,
4802 v->PSCL_FACTOR_CHROMA,
4803 v->RequiredDPPCLKThisState,
4804 v->ReadBandwidthLuma,
4805 v->ReadBandwidthChroma,
4807 &v->ProjectedDCFCLKDeepSleep[i][j]);
4809 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4810 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4811 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4812 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4813 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4814 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4815 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4816 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4817 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4822 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4823 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
4824 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4827 for (i = 0; i < v->soc.num_states; i++) {
4828 for (j = 0; j < 2; j++) {
4829 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
4831 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4832 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4833 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4834 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4835 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4836 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4837 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4838 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4839 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4842 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4843 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4844 if (v->DCCEnable[k] == true) {
4845 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4849 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4850 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4851 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4853 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
4854 && v->SourceScan[k] != dm_vert) {
4855 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
4857 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4859 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4860 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4863 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4866 v->Read256BlockHeightC[k],
4867 v->Read256BlockWidthC[k],
4868 v->SourcePixelFormat[k],
4869 v->SurfaceTiling[k],
4870 v->BytePerPixelC[k],
4872 v->SwathWidthCThisState[k],
4873 v->ViewportHeightChroma[k],
4876 v->HostVMMaxNonCachedPageTableLevels,
4877 v->GPUVMMinPageSize,
4878 v->HostVMMinPageSize,
4879 v->PTEBufferSizeInRequestsForChroma,
4882 &v->MacroTileWidthC[k],
4884 &v->DPTEBytesPerRowC,
4885 &v->PTEBufferSizeNotExceededC[i][j][k],
4887 &v->dpte_row_height_chroma[k],
4891 &v->meta_row_height_chroma[k],
4898 &v->dummyinteger11);
4900 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4905 v->ProgressiveToInterlaceUnitInOPP,
4906 v->SwathHeightCThisState[k],
4907 v->ViewportYStartC[k],
4911 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4912 v->PTEBufferSizeInRequestsForChroma = 0;
4913 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4914 v->MetaRowBytesC = 0.0;
4915 v->DPTEBytesPerRowC = 0.0;
4916 v->PrefetchLinesC[i][j][k] = 0.0;
4917 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4919 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4922 v->Read256BlockHeightY[k],
4923 v->Read256BlockWidthY[k],
4924 v->SourcePixelFormat[k],
4925 v->SurfaceTiling[k],
4926 v->BytePerPixelY[k],
4928 v->SwathWidthYThisState[k],
4929 v->ViewportHeight[k],
4932 v->HostVMMaxNonCachedPageTableLevels,
4933 v->GPUVMMinPageSize,
4934 v->HostVMMinPageSize,
4935 v->PTEBufferSizeInRequestsForLuma,
4937 v->DCCMetaPitchY[k],
4938 &v->MacroTileWidthY[k],
4940 &v->DPTEBytesPerRowY,
4941 &v->PTEBufferSizeNotExceededY[i][j][k],
4943 &v->dpte_row_height[k],
4947 &v->meta_row_height[k],
4949 &v->dpte_group_bytes[k],
4955 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4960 v->ProgressiveToInterlaceUnitInOPP,
4961 v->SwathHeightYThisState[k],
4962 v->ViewportYStartY[k],
4965 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4966 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4967 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4969 CalculateRowBandwidth(
4971 v->SourcePixelFormat[k],
4975 v->HTotal[k] / v->PixelClock[k],
4978 v->meta_row_height[k],
4979 v->meta_row_height_chroma[k],
4980 v->DPTEBytesPerRowY,
4981 v->DPTEBytesPerRowC,
4982 v->dpte_row_height[k],
4983 v->dpte_row_height_chroma[k],
4984 &v->meta_row_bandwidth[i][j][k],
4985 &v->dpte_row_bandwidth[i][j][k]);
4988 * DCCMetaBufferSizeSupport(i, j) = True
4989 * For k = 0 To NumberOfActivePlanes - 1
4990 * If MetaRowBytes(i, j, k) > 24064 Then
4991 * DCCMetaBufferSizeSupport(i, j) = False
4995 v->DCCMetaBufferSizeSupport[i][j] = true;
4996 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4997 if (v->MetaRowBytes[i][j][k] > 24064)
4998 v->DCCMetaBufferSizeSupport[i][j] = false;
5000 v->UrgLatency[i] = CalculateUrgentLatency(
5001 v->UrgentLatencyPixelDataOnly,
5002 v->UrgentLatencyPixelMixedWithVMData,
5003 v->UrgentLatencyVMDataOnly,
5004 v->DoUrgentLatencyAdjustment,
5005 v->UrgentLatencyAdjustmentFabricClockComponent,
5006 v->UrgentLatencyAdjustmentFabricClockReference,
5007 v->FabricClockPerState[i]);
5009 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5010 CalculateUrgentBurstFactor(
5011 v->swath_width_luma_ub_this_state[k],
5012 v->swath_width_chroma_ub_this_state[k],
5013 v->SwathHeightYThisState[k],
5014 v->SwathHeightCThisState[k],
5015 v->HTotal[k] / v->PixelClock[k],
5017 v->CursorBufferSize,
5018 v->CursorWidth[k][0],
5022 v->BytePerPixelInDETY[k],
5023 v->BytePerPixelInDETC[k],
5024 v->DETBufferSizeYThisState[k],
5025 v->DETBufferSizeCThisState[k],
5026 &v->UrgentBurstFactorCursor[k],
5027 &v->UrgentBurstFactorLuma[k],
5028 &v->UrgentBurstFactorChroma[k],
5029 &NotUrgentLatencyHiding[k]);
5032 v->NotEnoughUrgentLatencyHidingA[i][j] = false;
5033 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5034 if (NotUrgentLatencyHiding[k]) {
5035 v->NotEnoughUrgentLatencyHidingA[i][j] = true;
5039 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5040 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
5041 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
5042 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
5045 v->TotalVActivePixelBandwidth[i][j] = 0;
5046 v->TotalVActiveCursorBandwidth[i][j] = 0;
5047 v->TotalMetaRowBandwidth[i][j] = 0;
5048 v->TotalDPTERowBandwidth[i][j] = 0;
5049 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5050 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
5051 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
5052 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
5053 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
5058 //Calculate Return BW
5059 for (i = 0; i < v->soc.num_states; ++i) {
5060 for (j = 0; j <= 1; ++j) {
5061 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5062 if (v->BlendingAndTiming[k] == k) {
5063 if (v->WritebackEnable[k] == true) {
5064 v->WritebackDelayTime[k] = v->WritebackLatency
5065 + CalculateWriteBackDelay(
5066 v->WritebackPixelFormat[k],
5067 v->WritebackHRatio[k],
5068 v->WritebackVRatio[k],
5069 v->WritebackVTaps[k],
5070 v->WritebackDestinationWidth[k],
5071 v->WritebackDestinationHeight[k],
5072 v->WritebackSourceHeight[k],
5073 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
5075 v->WritebackDelayTime[k] = 0.0;
5077 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5078 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
5079 v->WritebackDelayTime[k] = dml_max(
5080 v->WritebackDelayTime[k],
5082 + CalculateWriteBackDelay(
5083 v->WritebackPixelFormat[m],
5084 v->WritebackHRatio[m],
5085 v->WritebackVRatio[m],
5086 v->WritebackVTaps[m],
5087 v->WritebackDestinationWidth[m],
5088 v->WritebackDestinationHeight[m],
5089 v->WritebackSourceHeight[m],
5090 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
5095 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5096 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5097 if (v->BlendingAndTiming[k] == m) {
5098 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
5102 v->MaxMaxVStartup[i][j] = 0;
5103 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5104 v->MaximumVStartup[i][j][k] =
5105 CalculateMaxVStartup(
5111 v->ProgressiveToInterlaceUnitInOPP,
5113 v->ip.VBlankNomDefaultUS,
5114 v->WritebackDelayTime[k]);
5115 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
5120 ReorderingBytes = v->NumberOfChannels
5122 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
5123 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
5124 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
5126 for (i = 0; i < v->soc.num_states; ++i) {
5127 for (j = 0; j <= 1; ++j) {
5128 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
5132 if (v->UseMinimumRequiredDCFCLK == true)
5133 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes);
5135 for (i = 0; i < v->soc.num_states; ++i) {
5136 for (j = 0; j <= 1; ++j) {
5137 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
5138 v->ReturnBusWidth * v->DCFCLKState[i][j],
5139 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
5140 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
5141 double PixelDataOnlyReturnBWPerState = dml_min(
5142 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5143 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
5144 double PixelMixedWithVMDataReturnBWPerState = dml_min(
5145 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5146 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
5148 if (v->HostVMEnable != true) {
5149 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
5151 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
5156 //Re-ordering Buffer Support Check
5157 for (i = 0; i < v->soc.num_states; ++i) {
5158 for (j = 0; j <= 1; ++j) {
5159 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
5160 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
5161 v->ROBSupport[i][j] = true;
5163 v->ROBSupport[i][j] = false;
5168 //Vertical Active BW support check
5170 MaxTotalVActiveRDBandwidth = 0;
5171 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5172 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
5175 for (i = 0; i < v->soc.num_states; ++i) {
5176 for (j = 0; j <= 1; ++j) {
5177 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
5179 v->ReturnBusWidth * v->DCFCLKState[i][j],
5180 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5181 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
5182 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5183 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
5185 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
5186 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
5188 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
5193 v->UrgentLatency = CalculateUrgentLatency(
5194 v->UrgentLatencyPixelDataOnly,
5195 v->UrgentLatencyPixelMixedWithVMData,
5196 v->UrgentLatencyVMDataOnly,
5197 v->DoUrgentLatencyAdjustment,
5198 v->UrgentLatencyAdjustmentFabricClockComponent,
5199 v->UrgentLatencyAdjustmentFabricClockReference,
5202 for (i = 0; i < v->soc.num_states; ++i) {
5203 for (j = 0; j <= 1; ++j) {
5204 double VMDataOnlyReturnBWPerState;
5205 double HostVMInefficiencyFactor = 1;
5206 int NextPrefetchModeState = MinPrefetchMode;
5207 bool UnboundedRequestEnabledThisState = false;
5208 int CompressedBufferSizeInkByteThisState = 0;
5211 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
5213 v->BandwidthWithoutPrefetchSupported[i][j] = true;
5214 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
5215 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
5216 v->BandwidthWithoutPrefetchSupported[i][j] = false;
5219 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5220 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
5221 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
5222 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
5223 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
5224 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
5225 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
5226 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
5227 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
5228 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
5231 VMDataOnlyReturnBWPerState = dml_min(
5233 v->ReturnBusWidth * v->DCFCLKState[i][j],
5234 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5235 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5236 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5237 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
5238 if (v->GPUVMEnable && v->HostVMEnable)
5239 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
5241 v->ExtraLatency = CalculateExtraLatency(
5242 v->RoundTripPingLatencyCycles,
5244 v->DCFCLKState[i][j],
5245 v->TotalNumberOfActiveDPP[i][j],
5246 v->PixelChunkSizeInKByte,
5247 v->TotalNumberOfDCCActiveDPP[i][j],
5249 v->ReturnBWPerState[i][j],
5252 v->NumberOfActivePlanes,
5253 v->NoOfDPPThisState,
5254 v->dpte_group_bytes,
5255 HostVMInefficiencyFactor,
5256 v->HostVMMinPageSize,
5257 v->HostVMMaxNonCachedPageTableLevels);
5259 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5261 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
5262 v->MaxVStartup = v->NextMaxVStartup;
5264 v->TWait = CalculateTWait(
5265 v->PrefetchModePerState[i][j],
5266 v->DRAMClockChangeLatency,
5268 v->SREnterPlusExitTime);
5270 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5271 CalculatePrefetchSchedulePerPlane(mode_lib,
5272 HostVMInefficiencyFactor,
5276 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5277 CalculateUrgentBurstFactor(
5278 v->swath_width_luma_ub_this_state[k],
5279 v->swath_width_chroma_ub_this_state[k],
5280 v->SwathHeightYThisState[k],
5281 v->SwathHeightCThisState[k],
5282 v->HTotal[k] / v->PixelClock[k],
5284 v->CursorBufferSize,
5285 v->CursorWidth[k][0],
5287 v->VRatioPreY[i][j][k],
5288 v->VRatioPreC[i][j][k],
5289 v->BytePerPixelInDETY[k],
5290 v->BytePerPixelInDETC[k],
5291 v->DETBufferSizeYThisState[k],
5292 v->DETBufferSizeCThisState[k],
5293 &v->UrgentBurstFactorCursorPre[k],
5294 &v->UrgentBurstFactorLumaPre[k],
5295 &v->UrgentBurstFactorChromaPre[k],
5296 &v->NotUrgentLatencyHidingPre[k]);
5299 v->MaximumReadBandwidthWithPrefetch = 0.0;
5300 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5301 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
5302 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
5304 v->MaximumReadBandwidthWithPrefetch =
5305 v->MaximumReadBandwidthWithPrefetch
5307 v->VActivePixelBandwidth[i][j][k]
5308 + v->VActiveCursorBandwidth[i][j][k]
5309 + v->NoOfDPP[i][j][k]
5310 * (v->meta_row_bandwidth[i][j][k]
5311 + v->dpte_row_bandwidth[i][j][k]),
5312 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5314 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5315 * v->UrgentBurstFactorLumaPre[k]
5316 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5317 * v->UrgentBurstFactorChromaPre[k])
5318 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5321 v->NotEnoughUrgentLatencyHidingPre = false;
5322 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5323 if (v->NotUrgentLatencyHidingPre[k] == true) {
5324 v->NotEnoughUrgentLatencyHidingPre = true;
5328 v->PrefetchSupported[i][j] = true;
5329 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5330 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5331 v->PrefetchSupported[i][j] = false;
5333 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5334 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5335 || v->NoTimeForPrefetch[i][j][k] == true) {
5336 v->PrefetchSupported[i][j] = false;
5340 v->DynamicMetadataSupported[i][j] = true;
5341 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5342 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5343 v->DynamicMetadataSupported[i][j] = false;
5347 v->VRatioInPrefetchSupported[i][j] = true;
5348 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5349 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5350 v->VRatioInPrefetchSupported[i][j] = false;
5353 v->AnyLinesForVMOrRowTooLarge = false;
5354 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5355 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5356 v->AnyLinesForVMOrRowTooLarge = true;
5360 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5362 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5363 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5364 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5365 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5367 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5369 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5370 * v->UrgentBurstFactorLumaPre[k]
5371 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5372 * v->UrgentBurstFactorChromaPre[k])
5373 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5375 v->TotImmediateFlipBytes = 0.0;
5376 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5377 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
5378 + v->NoOfDPP[i][j][k] * (v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
5379 + v->DPTEBytesPerRow[i][j][k]);
5382 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5383 CalculateFlipSchedule(
5386 HostVMInefficiencyFactor,
5389 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5390 v->MetaRowBytes[i][j][k],
5391 v->DPTEBytesPerRow[i][j][k]);
5393 v->total_dcn_read_bw_with_flip = 0.0;
5394 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5395 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5397 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5398 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5399 + v->VActiveCursorBandwidth[i][j][k],
5401 * (v->final_flip_bw[k]
5402 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5403 * v->UrgentBurstFactorLumaPre[k]
5404 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5405 * v->UrgentBurstFactorChromaPre[k])
5406 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5408 v->ImmediateFlipSupportedForState[i][j] = true;
5409 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5410 v->ImmediateFlipSupportedForState[i][j] = false;
5412 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5413 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5414 v->ImmediateFlipSupportedForState[i][j] = false;
5418 v->ImmediateFlipSupportedForState[i][j] = false;
5421 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
5422 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5423 NextPrefetchModeState = NextPrefetchModeState + 1;
5425 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5427 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5428 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5429 && ((v->HostVMEnable == false &&
5430 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5431 || v->ImmediateFlipSupportedForState[i][j] == true))
5432 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5434 CalculateUnboundedRequestAndCompressedBufferSize(
5435 v->DETBufferSizeInKByte[0],
5436 v->ConfigReturnBufferSizeInKByte,
5437 v->UseUnboundedRequesting,
5438 v->TotalNumberOfActiveDPP[i][j],
5441 v->CompressedBufferSegmentSizeInkByte,
5443 &UnboundedRequestEnabledThisState,
5444 &CompressedBufferSizeInkByteThisState);
5446 CalculateWatermarksAndDRAMSpeedChangeSupport(
5448 v->PrefetchModePerState[i][j],
5449 v->DCFCLKState[i][j],
5450 v->ReturnBWPerState[i][j],
5453 v->SOCCLKPerState[i],
5454 v->ProjectedDCFCLKDeepSleep[i][j],
5455 v->DETBufferSizeYThisState,
5456 v->DETBufferSizeCThisState,
5457 v->SwathHeightYThisState,
5458 v->SwathHeightCThisState,
5459 v->SwathWidthYThisState,
5460 v->SwathWidthCThisState,
5461 v->NoOfDPPThisState,
5462 v->BytePerPixelInDETY,
5463 v->BytePerPixelInDETC,
5464 UnboundedRequestEnabledThisState,
5465 CompressedBufferSizeInkByteThisState,
5466 &v->DRAMClockChangeSupport[i][j],
5474 /*PTE Buffer Size Check*/
5475 for (i = 0; i < v->soc.num_states; i++) {
5476 for (j = 0; j < 2; j++) {
5477 v->PTEBufferSizeNotExceeded[i][j] = true;
5478 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5479 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5480 v->PTEBufferSizeNotExceeded[i][j] = false;
5486 /*Cursor Support Check*/
5487 v->CursorSupport = true;
5488 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5489 if (v->CursorWidth[k][0] > 0.0) {
5490 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5491 v->CursorSupport = false;
5496 /*Valid Pitch Check*/
5497 v->PitchSupport = true;
5498 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5499 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5500 if (v->DCCEnable[k] == true) {
5501 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5503 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5505 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5506 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
5507 && v->SourcePixelFormat[k] != dm_mono_8) {
5508 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5509 if (v->DCCEnable[k] == true) {
5510 v->AlignedDCCMetaPitchC[k] = dml_ceil(
5511 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]),
5512 64.0 * v->Read256BlockWidthC[k]);
5514 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5517 v->AlignedCPitch[k] = v->PitchC[k];
5518 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5520 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
5521 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5522 v->PitchSupport = false;
5526 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5527 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
5528 ViewportExceedsSurface = true;
5529 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
5530 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
5531 && v->SourcePixelFormat[k] != dm_rgbe) {
5532 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
5533 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5534 ViewportExceedsSurface = true;
5540 /*Mode Support, Voltage State and SOC Configuration*/
5541 for (i = v->soc.num_states - 1; i >= 0; i--) {
5542 for (j = 0; j < 2; j++) {
5543 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
5544 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
5545 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
5546 && v->DTBCLKRequiredMoreThanSupported[i] == false
5547 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
5548 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
5549 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
5550 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
5551 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
5552 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5553 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
5554 && ((v->HostVMEnable == false
5555 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5556 || v->ImmediateFlipSupportedForState[i][j] == true)
5557 && FMTBufferExceeded == false) {
5558 v->ModeSupport[i][j] = true;
5560 v->ModeSupport[i][j] = false;
5564 for (i = v->soc.num_states; i >= 0; i--) {
5565 for (j = 0; j < 2; j++) {
5566 enum dm_validation_status status = DML_VALIDATION_OK;
5568 if (!v->ScaleRatioAndTapsSupport) {
5569 status = DML_FAIL_SCALE_RATIO_TAP;
5570 } else if (!v->SourceFormatPixelAndScanSupport) {
5571 status = DML_FAIL_SOURCE_PIXEL_FORMAT;
5572 } else if (!v->ViewportSizeSupport[i][j]) {
5573 status = DML_FAIL_VIEWPORT_SIZE;
5574 } else if (P2IWith420) {
5575 status = DML_FAIL_P2I_WITH_420;
5576 } else if (DSCOnlyIfNecessaryWithBPP) {
5577 status = DML_FAIL_DSC_ONLY_IF_NECESSARY_WITH_BPP;
5578 } else if (DSC422NativeNotSupported) {
5579 status = DML_FAIL_NOT_DSC422_NATIVE;
5580 } else if (!v->ODMCombine4To1SupportCheckOK[i]) {
5581 status = DML_FAIL_ODM_COMBINE4TO1;
5582 } else if (v->NotEnoughDSCUnits[i]) {
5583 status = DML_FAIL_NOT_ENOUGH_DSC;
5584 } else if (!v->ROBSupport[i][j]) {
5585 status = DML_FAIL_REORDERING_BUFFER;
5586 } else if (!v->DISPCLK_DPPCLK_Support[i][j]) {
5587 status = DML_FAIL_DISPCLK_DPPCLK;
5588 } else if (!v->TotalAvailablePipesSupport[i][j]) {
5589 status = DML_FAIL_TOTAL_AVAILABLE_PIPES;
5590 } else if (!EnoughWritebackUnits) {
5591 status = DML_FAIL_ENOUGH_WRITEBACK_UNITS;
5592 } else if (!v->WritebackLatencySupport) {
5593 status = DML_FAIL_WRITEBACK_LATENCY;
5594 } else if (!v->WritebackScaleRatioAndTapsSupport) {
5595 status = DML_FAIL_WRITEBACK_SCALE_RATIO_TAP;
5596 } else if (!v->CursorSupport) {
5597 status = DML_FAIL_CURSOR_SUPPORT;
5598 } else if (!v->PitchSupport) {
5599 status = DML_FAIL_PITCH_SUPPORT;
5600 } else if (ViewportExceedsSurface) {
5601 status = DML_FAIL_VIEWPORT_EXCEEDS_SURFACE;
5602 } else if (!v->PrefetchSupported[i][j]) {
5603 status = DML_FAIL_PREFETCH_SUPPORT;
5604 } else if (!v->DynamicMetadataSupported[i][j]) {
5605 status = DML_FAIL_DYNAMIC_METADATA;
5606 } else if (!v->TotalVerticalActiveBandwidthSupport[i][j]) {
5607 status = DML_FAIL_TOTAL_V_ACTIVE_BW;
5608 } else if (!v->VRatioInPrefetchSupported[i][j]) {
5609 status = DML_FAIL_V_RATIO_PREFETCH;
5610 } else if (!v->PTEBufferSizeNotExceeded[i][j]) {
5611 status = DML_FAIL_PTE_BUFFER_SIZE;
5612 } else if (v->NonsupportedDSCInputBPC) {
5613 status = DML_FAIL_DSC_INPUT_BPC;
5614 } else if ((v->HostVMEnable
5615 && !v->ImmediateFlipSupportedForState[i][j])) {
5616 status = DML_FAIL_HOST_VM_IMMEDIATE_FLIP;
5617 } else if (FMTBufferExceeded) {
5618 status = DML_FAIL_FMT_BUFFER_EXCEEDED;
5620 mode_lib->vba.ValidationStatus[i] = status;
5625 unsigned int MaximumMPCCombine = 0;
5627 for (i = v->soc.num_states; i >= 0; i--) {
5628 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5629 v->VoltageLevel = i;
5630 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5631 if (v->ModeSupport[i][0] == true) {
5632 MaximumMPCCombine = 0;
5634 MaximumMPCCombine = 1;
5638 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5639 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5640 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5641 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5643 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5644 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5645 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5646 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5647 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5648 v->maxMpcComb = MaximumMPCCombine;
5652 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5653 struct display_mode_lib *mode_lib,
5654 unsigned int PrefetchMode,
5657 double UrgentLatency,
5658 double ExtraLatency,
5660 double DCFCLKDeepSleep,
5661 unsigned int DETBufferSizeY[],
5662 unsigned int DETBufferSizeC[],
5663 unsigned int SwathHeightY[],
5664 unsigned int SwathHeightC[],
5665 double SwathWidthY[],
5666 double SwathWidthC[],
5667 unsigned int DPPPerPlane[],
5668 double BytePerPixelDETY[],
5669 double BytePerPixelDETC[],
5670 bool UnboundedRequestEnabled,
5671 unsigned int CompressedBufferSizeInkByte,
5672 enum clock_change_support *DRAMClockChangeSupport,
5673 double *StutterExitWatermark,
5674 double *StutterEnterPlusExitWatermark,
5675 double *Z8StutterExitWatermark,
5676 double *Z8StutterEnterPlusExitWatermark)
5678 struct vba_vars_st *v = &mode_lib->vba;
5679 double EffectiveLBLatencyHidingY;
5680 double EffectiveLBLatencyHidingC;
5681 double LinesInDETY[DC__NUM_DPP__MAX];
5683 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5684 unsigned int LinesInDETCRoundedDownToSwath;
5685 double FullDETBufferingTimeY;
5686 double FullDETBufferingTimeC;
5687 double ActiveDRAMClockChangeLatencyMarginY;
5688 double ActiveDRAMClockChangeLatencyMarginC;
5689 double WritebackDRAMClockChangeLatencyMargin;
5690 double PlaneWithMinActiveDRAMClockChangeMargin;
5691 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5692 double WritebackDRAMClockChangeLatencyHiding;
5693 double TotalPixelBW = 0.0;
5696 v->UrgentWatermark = UrgentLatency + ExtraLatency;
5698 #ifdef __DML_VBA_DEBUG__
5699 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
5700 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
5701 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark);
5704 v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark;
5706 #ifdef __DML_VBA_DEBUG__
5707 dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency);
5708 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark);
5711 v->TotalActiveWriteback = 0;
5712 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5713 if (v->WritebackEnable[k] == true) {
5714 v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
5718 if (v->TotalActiveWriteback <= 1) {
5719 v->WritebackUrgentWatermark = v->WritebackLatency;
5721 v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5724 if (v->TotalActiveWriteback <= 1) {
5725 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency;
5727 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5730 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5731 TotalPixelBW = TotalPixelBW
5732 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k])
5733 / (v->HTotal[k] / v->PixelClock[k]);
5736 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5737 double EffectiveDETBufferSizeY = DETBufferSizeY[k];
5739 v->LBLatencyHidingSourceLinesY = dml_min(
5740 (double) v->MaxLineBufferLines,
5741 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
5743 v->LBLatencyHidingSourceLinesC = dml_min(
5744 (double) v->MaxLineBufferLines,
5745 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
5747 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
5749 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
5751 if (UnboundedRequestEnabled) {
5752 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
5753 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
5756 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5757 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5758 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
5759 if (BytePerPixelDETC[k] > 0) {
5760 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5761 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5762 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k];
5765 FullDETBufferingTimeC = 999999;
5768 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
5769 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5771 if (v->NumberOfActivePlanes > 1) {
5772 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5773 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k];
5776 if (BytePerPixelDETC[k] > 0) {
5777 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
5778 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5780 if (v->NumberOfActivePlanes > 1) {
5781 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5782 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k];
5784 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5786 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5789 if (v->WritebackEnable[k] == true) {
5790 WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024
5791 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
5792 if (v->WritebackPixelFormat[k] == dm_444_64) {
5793 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5795 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
5796 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5800 v->MinActiveDRAMClockChangeMargin = 999999;
5801 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5802 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5803 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
5804 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
5805 if (v->BlendingAndTiming[k] == k) {
5806 PlaneWithMinActiveDRAMClockChangeMargin = k;
5808 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
5809 if (v->BlendingAndTiming[k] == j) {
5810 PlaneWithMinActiveDRAMClockChangeMargin = j;
5817 v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ;
5819 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5820 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5821 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5822 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5823 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
5827 v->TotalNumberOfActiveOTG = 0;
5829 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5830 if (v->BlendingAndTiming[k] == k) {
5831 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
5835 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5836 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5837 } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
5838 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
5839 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5841 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5844 *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5845 *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
5846 *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5847 *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5849 #ifdef __DML_VBA_DEBUG__
5850 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
5851 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
5852 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
5853 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
5857 static void CalculateDCFCLKDeepSleep(
5858 struct display_mode_lib *mode_lib,
5859 unsigned int NumberOfActivePlanes,
5860 int BytePerPixelY[],
5861 int BytePerPixelC[],
5863 double VRatioChroma[],
5864 double SwathWidthY[],
5865 double SwathWidthC[],
5866 unsigned int DPPPerPlane[],
5868 double HRatioChroma[],
5869 double PixelClock[],
5870 double PSCL_THROUGHPUT[],
5871 double PSCL_THROUGHPUT_CHROMA[],
5873 double ReadBandwidthLuma[],
5874 double ReadBandwidthChroma[],
5876 double *DCFCLKDeepSleep)
5878 struct vba_vars_st *v = &mode_lib->vba;
5879 double DisplayPipeLineDeliveryTimeLuma;
5880 double DisplayPipeLineDeliveryTimeChroma;
5881 double ReadBandwidth = 0.0;
5884 for (k = 0; k < NumberOfActivePlanes; ++k) {
5886 if (VRatio[k] <= 1) {
5887 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5889 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5891 if (BytePerPixelC[k] == 0) {
5892 DisplayPipeLineDeliveryTimeChroma = 0;
5894 if (VRatioChroma[k] <= 1) {
5895 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5897 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5901 if (BytePerPixelC[k] > 0) {
5902 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
5903 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5905 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5907 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5911 for (k = 0; k < NumberOfActivePlanes; ++k) {
5912 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5915 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
5917 for (k = 0; k < NumberOfActivePlanes; ++k) {
5918 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]);
5922 static void CalculateUrgentBurstFactor(
5923 int swath_width_luma_ub,
5924 int swath_width_chroma_ub,
5925 unsigned int SwathHeightY,
5926 unsigned int SwathHeightC,
5928 double UrgentLatency,
5929 double CursorBufferSize,
5930 unsigned int CursorWidth,
5931 unsigned int CursorBPP,
5934 double BytePerPixelInDETY,
5935 double BytePerPixelInDETC,
5936 double DETBufferSizeY,
5937 double DETBufferSizeC,
5938 double *UrgentBurstFactorCursor,
5939 double *UrgentBurstFactorLuma,
5940 double *UrgentBurstFactorChroma,
5941 bool *NotEnoughUrgentLatencyHiding)
5943 double LinesInDETLuma;
5944 double LinesInDETChroma;
5945 unsigned int LinesInCursorBuffer;
5946 double CursorBufferSizeInTime;
5947 double DETBufferSizeInTimeLuma;
5948 double DETBufferSizeInTimeChroma;
5950 *NotEnoughUrgentLatencyHiding = 0;
5952 if (CursorWidth > 0) {
5953 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5955 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5956 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5957 *NotEnoughUrgentLatencyHiding = 1;
5958 *UrgentBurstFactorCursor = 0;
5960 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5963 *UrgentBurstFactorCursor = 1;
5967 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5969 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5970 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5971 *NotEnoughUrgentLatencyHiding = 1;
5972 *UrgentBurstFactorLuma = 0;
5974 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5977 *UrgentBurstFactorLuma = 1;
5980 if (BytePerPixelInDETC > 0) {
5981 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5983 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5984 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5985 *NotEnoughUrgentLatencyHiding = 1;
5986 *UrgentBurstFactorChroma = 0;
5988 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5991 *UrgentBurstFactorChroma = 1;
5996 static void CalculatePixelDeliveryTimes(
5997 unsigned int NumberOfActivePlanes,
5999 double VRatioChroma[],
6000 double VRatioPrefetchY[],
6001 double VRatioPrefetchC[],
6002 unsigned int swath_width_luma_ub[],
6003 unsigned int swath_width_chroma_ub[],
6004 unsigned int DPPPerPlane[],
6006 double HRatioChroma[],
6007 double PixelClock[],
6008 double PSCL_THROUGHPUT[],
6009 double PSCL_THROUGHPUT_CHROMA[],
6011 int BytePerPixelC[],
6012 enum scan_direction_class SourceScan[],
6013 unsigned int NumberOfCursors[],
6014 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
6015 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
6016 unsigned int BlockWidth256BytesY[],
6017 unsigned int BlockHeight256BytesY[],
6018 unsigned int BlockWidth256BytesC[],
6019 unsigned int BlockHeight256BytesC[],
6020 double DisplayPipeLineDeliveryTimeLuma[],
6021 double DisplayPipeLineDeliveryTimeChroma[],
6022 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
6023 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
6024 double DisplayPipeRequestDeliveryTimeLuma[],
6025 double DisplayPipeRequestDeliveryTimeChroma[],
6026 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
6027 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
6028 double CursorRequestDeliveryTime[],
6029 double CursorRequestDeliveryTimePrefetch[])
6031 double req_per_swath_ub;
6034 for (k = 0; k < NumberOfActivePlanes; ++k) {
6035 if (VRatio[k] <= 1) {
6036 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6038 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6041 if (BytePerPixelC[k] == 0) {
6042 DisplayPipeLineDeliveryTimeChroma[k] = 0;
6044 if (VRatioChroma[k] <= 1) {
6045 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6047 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6051 if (VRatioPrefetchY[k] <= 1) {
6052 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6054 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6057 if (BytePerPixelC[k] == 0) {
6058 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
6060 if (VRatioPrefetchC[k] <= 1) {
6061 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6063 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6068 for (k = 0; k < NumberOfActivePlanes; ++k) {
6069 if (SourceScan[k] != dm_vert) {
6070 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
6072 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
6074 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
6075 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
6076 if (BytePerPixelC[k] == 0) {
6077 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
6078 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
6080 if (SourceScan[k] != dm_vert) {
6081 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
6083 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
6085 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
6086 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
6088 #ifdef __DML_VBA_DEBUG__
6089 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
6090 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
6091 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
6092 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
6093 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
6094 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
6095 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
6096 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
6097 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
6098 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
6099 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
6100 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
6104 for (k = 0; k < NumberOfActivePlanes; ++k) {
6105 int cursor_req_per_width;
6107 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
6108 if (NumberOfCursors[k] > 0) {
6109 if (VRatio[k] <= 1) {
6110 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6112 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6114 if (VRatioPrefetchY[k] <= 1) {
6115 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6117 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6120 CursorRequestDeliveryTime[k] = 0;
6121 CursorRequestDeliveryTimePrefetch[k] = 0;
6123 #ifdef __DML_VBA_DEBUG__
6124 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
6125 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
6126 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
6131 static void CalculateMetaAndPTETimes(
6132 int NumberOfActivePlanes,
6135 int MinMetaChunkSizeBytes,
6138 double VRatioChroma[],
6139 double DestinationLinesToRequestRowInVBlank[],
6140 double DestinationLinesToRequestRowInImmediateFlip[],
6142 double PixelClock[],
6143 int BytePerPixelY[],
6144 int BytePerPixelC[],
6145 enum scan_direction_class SourceScan[],
6146 int dpte_row_height[],
6147 int dpte_row_height_chroma[],
6148 int meta_row_width[],
6149 int meta_row_width_chroma[],
6150 int meta_row_height[],
6151 int meta_row_height_chroma[],
6152 int meta_req_width[],
6153 int meta_req_width_chroma[],
6154 int meta_req_height[],
6155 int meta_req_height_chroma[],
6156 int dpte_group_bytes[],
6157 int PTERequestSizeY[],
6158 int PTERequestSizeC[],
6159 int PixelPTEReqWidthY[],
6160 int PixelPTEReqHeightY[],
6161 int PixelPTEReqWidthC[],
6162 int PixelPTEReqHeightC[],
6163 int dpte_row_width_luma_ub[],
6164 int dpte_row_width_chroma_ub[],
6165 double DST_Y_PER_PTE_ROW_NOM_L[],
6166 double DST_Y_PER_PTE_ROW_NOM_C[],
6167 double DST_Y_PER_META_ROW_NOM_L[],
6168 double DST_Y_PER_META_ROW_NOM_C[],
6169 double TimePerMetaChunkNominal[],
6170 double TimePerChromaMetaChunkNominal[],
6171 double TimePerMetaChunkVBlank[],
6172 double TimePerChromaMetaChunkVBlank[],
6173 double TimePerMetaChunkFlip[],
6174 double TimePerChromaMetaChunkFlip[],
6175 double time_per_pte_group_nom_luma[],
6176 double time_per_pte_group_vblank_luma[],
6177 double time_per_pte_group_flip_luma[],
6178 double time_per_pte_group_nom_chroma[],
6179 double time_per_pte_group_vblank_chroma[],
6180 double time_per_pte_group_flip_chroma[])
6182 unsigned int meta_chunk_width;
6183 unsigned int min_meta_chunk_width;
6184 unsigned int meta_chunk_per_row_int;
6185 unsigned int meta_row_remainder;
6186 unsigned int meta_chunk_threshold;
6187 unsigned int meta_chunks_per_row_ub;
6188 unsigned int meta_chunk_width_chroma;
6189 unsigned int min_meta_chunk_width_chroma;
6190 unsigned int meta_chunk_per_row_int_chroma;
6191 unsigned int meta_row_remainder_chroma;
6192 unsigned int meta_chunk_threshold_chroma;
6193 unsigned int meta_chunks_per_row_ub_chroma;
6194 unsigned int dpte_group_width_luma;
6195 unsigned int dpte_groups_per_row_luma_ub;
6196 unsigned int dpte_group_width_chroma;
6197 unsigned int dpte_groups_per_row_chroma_ub;
6200 for (k = 0; k < NumberOfActivePlanes; ++k) {
6201 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
6202 if (BytePerPixelC[k] == 0) {
6203 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
6205 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
6207 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
6208 if (BytePerPixelC[k] == 0) {
6209 DST_Y_PER_META_ROW_NOM_C[k] = 0;
6211 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
6215 for (k = 0; k < NumberOfActivePlanes; ++k) {
6216 if (DCCEnable[k] == true) {
6217 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
6218 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
6219 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
6220 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
6221 if (SourceScan[k] != dm_vert) {
6222 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
6224 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
6226 if (meta_row_remainder <= meta_chunk_threshold) {
6227 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
6229 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
6231 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6232 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6233 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6234 if (BytePerPixelC[k] == 0) {
6235 TimePerChromaMetaChunkNominal[k] = 0;
6236 TimePerChromaMetaChunkVBlank[k] = 0;
6237 TimePerChromaMetaChunkFlip[k] = 0;
6239 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6240 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6241 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
6242 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
6243 if (SourceScan[k] != dm_vert) {
6244 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
6246 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
6248 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
6249 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
6251 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
6253 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6254 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6255 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6258 TimePerMetaChunkNominal[k] = 0;
6259 TimePerMetaChunkVBlank[k] = 0;
6260 TimePerMetaChunkFlip[k] = 0;
6261 TimePerChromaMetaChunkNominal[k] = 0;
6262 TimePerChromaMetaChunkVBlank[k] = 0;
6263 TimePerChromaMetaChunkFlip[k] = 0;
6267 for (k = 0; k < NumberOfActivePlanes; ++k) {
6268 if (GPUVMEnable == true) {
6269 if (SourceScan[k] != dm_vert) {
6270 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
6272 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
6274 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
6275 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6276 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6277 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6278 if (BytePerPixelC[k] == 0) {
6279 time_per_pte_group_nom_chroma[k] = 0;
6280 time_per_pte_group_vblank_chroma[k] = 0;
6281 time_per_pte_group_flip_chroma[k] = 0;
6283 if (SourceScan[k] != dm_vert) {
6284 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
6286 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
6288 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
6289 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6290 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6291 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6294 time_per_pte_group_nom_luma[k] = 0;
6295 time_per_pte_group_vblank_luma[k] = 0;
6296 time_per_pte_group_flip_luma[k] = 0;
6297 time_per_pte_group_nom_chroma[k] = 0;
6298 time_per_pte_group_vblank_chroma[k] = 0;
6299 time_per_pte_group_flip_chroma[k] = 0;
6304 static void CalculateVMGroupAndRequestTimes(
6305 unsigned int NumberOfActivePlanes,
6307 unsigned int GPUVMMaxPageTableLevels,
6308 unsigned int HTotal[],
6309 int BytePerPixelC[],
6310 double DestinationLinesToRequestVMInVBlank[],
6311 double DestinationLinesToRequestVMInImmediateFlip[],
6313 double PixelClock[],
6314 int dpte_row_width_luma_ub[],
6315 int dpte_row_width_chroma_ub[],
6316 int vm_group_bytes[],
6317 unsigned int dpde0_bytes_per_frame_ub_l[],
6318 unsigned int dpde0_bytes_per_frame_ub_c[],
6319 int meta_pte_bytes_per_frame_ub_l[],
6320 int meta_pte_bytes_per_frame_ub_c[],
6321 double TimePerVMGroupVBlank[],
6322 double TimePerVMGroupFlip[],
6323 double TimePerVMRequestVBlank[],
6324 double TimePerVMRequestFlip[])
6326 int num_group_per_lower_vm_stage;
6327 int num_req_per_lower_vm_stage;
6330 for (k = 0; k < NumberOfActivePlanes; ++k) {
6331 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6332 if (DCCEnable[k] == false) {
6333 if (BytePerPixelC[k] > 0) {
6334 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6335 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6337 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6340 if (GPUVMMaxPageTableLevels == 1) {
6341 if (BytePerPixelC[k] > 0) {
6342 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6343 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6345 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6348 if (BytePerPixelC[k] > 0) {
6349 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6350 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6351 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6352 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6354 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6355 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6360 if (DCCEnable[k] == false) {
6361 if (BytePerPixelC[k] > 0) {
6362 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6364 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6367 if (GPUVMMaxPageTableLevels == 1) {
6368 if (BytePerPixelC[k] > 0) {
6369 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6371 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6374 if (BytePerPixelC[k] > 0) {
6375 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
6376 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6378 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6383 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6384 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6385 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6386 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6388 if (GPUVMMaxPageTableLevels > 2) {
6389 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6390 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6391 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6392 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6396 TimePerVMGroupVBlank[k] = 0;
6397 TimePerVMGroupFlip[k] = 0;
6398 TimePerVMRequestVBlank[k] = 0;
6399 TimePerVMRequestFlip[k] = 0;
6404 static void CalculateStutterEfficiency(
6405 struct display_mode_lib *mode_lib,
6406 int CompressedBufferSizeInkByte,
6407 bool UnboundedRequestEnabled,
6408 int ConfigReturnBufferSizeInKByte,
6409 int MetaFIFOSizeInKEntries,
6410 int ZeroSizeBufferEntries,
6411 int NumberOfActivePlanes,
6412 int ROBBufferSizeInKByte,
6413 double TotalDataReadBandwidth,
6416 double COMPBUF_RESERVED_SPACE_64B,
6417 double COMPBUF_RESERVED_SPACE_ZS,
6419 double SRExitZ8Time,
6420 bool SynchronizedVBlank,
6421 double Z8StutterEnterPlusExitWatermark,
6422 double StutterEnterPlusExitWatermark,
6423 bool ProgressiveToInterlaceUnitInOPP,
6425 double MinTTUVBlank[],
6427 unsigned int DETBufferSizeY[],
6428 int BytePerPixelY[],
6429 double BytePerPixelDETY[],
6430 double SwathWidthY[],
6433 double NetDCCRateLuma[],
6434 double NetDCCRateChroma[],
6435 double DCCFractionOfZeroSizeRequestsLuma[],
6436 double DCCFractionOfZeroSizeRequestsChroma[],
6439 double PixelClock[],
6441 enum scan_direction_class SourceScan[],
6442 int BlockHeight256BytesY[],
6443 int BlockWidth256BytesY[],
6444 int BlockHeight256BytesC[],
6445 int BlockWidth256BytesC[],
6446 int DCCYMaxUncompressedBlock[],
6447 int DCCCMaxUncompressedBlock[],
6450 bool WritebackEnable[],
6451 double ReadBandwidthPlaneLuma[],
6452 double ReadBandwidthPlaneChroma[],
6453 double meta_row_bw[],
6454 double dpte_row_bw[],
6455 double *StutterEfficiencyNotIncludingVBlank,
6456 double *StutterEfficiency,
6457 int *NumberOfStutterBurstsPerFrame,
6458 double *Z8StutterEfficiencyNotIncludingVBlank,
6459 double *Z8StutterEfficiency,
6460 int *Z8NumberOfStutterBurstsPerFrame,
6461 double *StutterPeriod)
6463 struct vba_vars_st *v = &mode_lib->vba;
6465 double DETBufferingTimeY;
6466 double SwathWidthYCriticalPlane = 0;
6467 double VActiveTimeCriticalPlane = 0;
6468 double FrameTimeCriticalPlane = 0;
6469 int BytePerPixelYCriticalPlane = 0;
6470 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6471 double MinTTUVBlankCriticalPlane = 0;
6472 double TotalCompressedReadBandwidth;
6473 double TotalRowReadBandwidth;
6474 double AverageDCCCompressionRate;
6475 double EffectiveCompressedBufferSize;
6476 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
6477 double StutterBurstTime;
6478 int TotalActiveWriteback;
6480 double LinesInDETYRoundedDownToSwath;
6481 double MaximumEffectiveCompressionLuma;
6482 double MaximumEffectiveCompressionChroma;
6483 double TotalZeroSizeRequestReadBandwidth;
6484 double TotalZeroSizeCompressedReadBandwidth;
6485 double AverageDCCZeroSizeFraction;
6486 double AverageZeroSizeCompressionRate;
6487 int TotalNumberOfActiveOTG = 0;
6488 double LastStutterPeriod = 0.0;
6489 double LastZ8StutterPeriod = 0.0;
6492 TotalZeroSizeRequestReadBandwidth = 0;
6493 TotalZeroSizeCompressedReadBandwidth = 0;
6494 TotalRowReadBandwidth = 0;
6495 TotalCompressedReadBandwidth = 0;
6497 for (k = 0; k < NumberOfActivePlanes; ++k) {
6498 if (DCCEnable[k] == true) {
6499 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
6500 || DCCYMaxUncompressedBlock[k] < 256) {
6501 MaximumEffectiveCompressionLuma = 2;
6503 MaximumEffectiveCompressionLuma = 4;
6505 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
6506 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
6507 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6508 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
6509 if (ReadBandwidthPlaneChroma[k] > 0) {
6510 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6511 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
6512 MaximumEffectiveCompressionChroma = 2;
6514 MaximumEffectiveCompressionChroma = 4;
6516 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
6517 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
6518 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
6519 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6520 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
6523 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6525 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6528 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
6529 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
6531 #ifdef __DML_VBA_DEBUG__
6532 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
6533 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
6534 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
6535 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
6536 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
6537 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6538 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
6539 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
6542 if (AverageDCCZeroSizeFraction == 1) {
6543 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6544 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
6545 } else if (AverageDCCZeroSizeFraction > 0) {
6546 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6547 EffectiveCompressedBufferSize = dml_min(
6548 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6549 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
6550 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
6551 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6552 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6554 "DML::%s: min 2 = %f\n",
6556 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
6557 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6558 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6560 EffectiveCompressedBufferSize = dml_min(
6561 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6562 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
6563 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6564 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
6567 #ifdef __DML_VBA_DEBUG__
6568 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
6569 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
6570 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6574 for (k = 0; k < NumberOfActivePlanes; ++k) {
6575 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
6576 / BytePerPixelDETY[k] / SwathWidthY[k];
6577 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
6578 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
6579 #ifdef __DML_VBA_DEBUG__
6580 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
6581 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
6582 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
6583 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
6584 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
6585 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
6586 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
6587 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
6588 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6589 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
6590 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
6591 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6594 if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
6595 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
6597 *StutterPeriod = DETBufferingTimeY;
6598 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
6599 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
6600 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6601 SwathWidthYCriticalPlane = SwathWidthY[k];
6602 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
6603 MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
6605 #ifdef __DML_VBA_DEBUG__
6606 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6607 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
6608 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
6609 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6610 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
6611 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
6612 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
6617 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize);
6618 #ifdef __DML_VBA_DEBUG__
6619 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
6620 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6621 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
6622 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
6623 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6624 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
6625 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6626 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
6627 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
6628 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
6631 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
6632 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
6633 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6634 #ifdef __DML_VBA_DEBUG__
6635 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
6636 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
6637 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
6638 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
6639 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6641 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6644 "DML::%s: Time to finish residue swath=%f\n",
6646 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6648 TotalActiveWriteback = 0;
6649 for (k = 0; k < NumberOfActivePlanes; ++k) {
6650 if (WritebackEnable[k]) {
6651 TotalActiveWriteback = TotalActiveWriteback + 1;
6655 if (TotalActiveWriteback == 0) {
6656 #ifdef __DML_VBA_DEBUG__
6657 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
6658 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
6659 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
6660 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6662 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
6663 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
6664 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6665 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6667 *StutterEfficiencyNotIncludingVBlank = 0.;
6668 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
6669 *NumberOfStutterBurstsPerFrame = 0;
6670 *Z8NumberOfStutterBurstsPerFrame = 0;
6672 #ifdef __DML_VBA_DEBUG__
6673 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6674 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6675 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
6676 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
6677 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6680 for (k = 0; k < NumberOfActivePlanes; ++k) {
6681 if (v->BlendingAndTiming[k] == k) {
6682 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6686 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6687 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6689 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
6690 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
6691 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6693 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6696 *StutterEfficiency = 0;
6699 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6700 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6701 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
6702 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
6703 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6705 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6708 *Z8StutterEfficiency = 0.;
6711 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6712 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6713 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6714 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6715 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6716 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6717 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6718 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6721 static void CalculateSwathAndDETConfiguration(
6722 bool ForceSingleDPP,
6723 int NumberOfActivePlanes,
6724 unsigned int DETBufferSizeInKByte,
6725 double MaximumSwathWidthLuma[],
6726 double MaximumSwathWidthChroma[],
6727 enum scan_direction_class SourceScan[],
6728 enum source_format_class SourcePixelFormat[],
6729 enum dm_swizzle_mode SurfaceTiling[],
6730 int ViewportWidth[],
6731 int ViewportHeight[],
6732 int SurfaceWidthY[],
6733 int SurfaceWidthC[],
6734 int SurfaceHeightY[],
6735 int SurfaceHeightC[],
6736 int Read256BytesBlockHeightY[],
6737 int Read256BytesBlockHeightC[],
6738 int Read256BytesBlockWidthY[],
6739 int Read256BytesBlockWidthC[],
6740 enum odm_combine_mode ODMCombineEnabled[],
6741 int BlendingAndTiming[],
6744 double BytePerPixDETY[],
6745 double BytePerPixDETC[],
6748 double HRatioChroma[],
6750 int swath_width_luma_ub[],
6751 int swath_width_chroma_ub[],
6752 double SwathWidth[],
6753 double SwathWidthChroma[],
6756 unsigned int DETBufferSizeY[],
6757 unsigned int DETBufferSizeC[],
6758 bool ViewportSizeSupportPerPlane[],
6759 bool *ViewportSizeSupport)
6761 int MaximumSwathHeightY[DC__NUM_DPP__MAX];
6762 int MaximumSwathHeightC[DC__NUM_DPP__MAX];
6763 int MinimumSwathHeightY;
6764 int MinimumSwathHeightC;
6765 int RoundedUpMaxSwathSizeBytesY;
6766 int RoundedUpMaxSwathSizeBytesC;
6767 int RoundedUpMinSwathSizeBytesY;
6768 int RoundedUpMinSwathSizeBytesC;
6769 int RoundedUpSwathSizeBytesY;
6770 int RoundedUpSwathSizeBytesC;
6771 double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
6772 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
6775 CalculateSwathWidth(
6777 NumberOfActivePlanes,
6789 Read256BytesBlockHeightY,
6790 Read256BytesBlockHeightC,
6791 Read256BytesBlockWidthY,
6792 Read256BytesBlockWidthC,
6797 SwathWidthSingleDPP,
6798 SwathWidthSingleDPPChroma,
6801 MaximumSwathHeightY,
6802 MaximumSwathHeightC,
6803 swath_width_luma_ub,
6804 swath_width_chroma_ub);
6806 *ViewportSizeSupport = true;
6807 for (k = 0; k < NumberOfActivePlanes; ++k) {
6808 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
6809 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
6810 if (SurfaceTiling[k] == dm_sw_linear
6811 || (SourcePixelFormat[k] == dm_444_64
6812 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6813 && SourceScan[k] != dm_vert)) {
6814 MinimumSwathHeightY = MaximumSwathHeightY[k];
6815 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6816 MinimumSwathHeightY = MaximumSwathHeightY[k];
6818 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6820 MinimumSwathHeightC = MaximumSwathHeightC[k];
6822 if (SurfaceTiling[k] == dm_sw_linear) {
6823 MinimumSwathHeightY = MaximumSwathHeightY[k];
6824 MinimumSwathHeightC = MaximumSwathHeightC[k];
6825 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
6826 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6827 MinimumSwathHeightC = MaximumSwathHeightC[k];
6828 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6829 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6830 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6831 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6832 MinimumSwathHeightY = MaximumSwathHeightY[k];
6833 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6835 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6836 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6840 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
6841 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
6842 if (SourcePixelFormat[k] == dm_420_10) {
6843 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6844 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6846 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
6847 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
6848 if (SourcePixelFormat[k] == dm_420_10) {
6849 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6850 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6853 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6854 SwathHeightY[k] = MaximumSwathHeightY[k];
6855 SwathHeightC[k] = MaximumSwathHeightC[k];
6856 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6857 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6858 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6859 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6860 SwathHeightY[k] = MinimumSwathHeightY;
6861 SwathHeightC[k] = MaximumSwathHeightC[k];
6862 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6863 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6864 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6865 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6866 SwathHeightY[k] = MaximumSwathHeightY[k];
6867 SwathHeightC[k] = MinimumSwathHeightC;
6868 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6869 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6871 SwathHeightY[k] = MinimumSwathHeightY;
6872 SwathHeightC[k] = MinimumSwathHeightC;
6873 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6874 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6877 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
6879 if (SwathHeightC[k] == 0) {
6880 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
6881 DETBufferSizeC[k] = 0;
6882 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6883 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
6884 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
6886 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024);
6887 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
6890 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6891 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6892 *ViewportSizeSupport = false;
6893 ViewportSizeSupportPerPlane[k] = false;
6895 ViewportSizeSupportPerPlane[k] = true;
6901 static void CalculateSwathWidth(
6902 bool ForceSingleDPP,
6903 int NumberOfActivePlanes,
6904 enum source_format_class SourcePixelFormat[],
6905 enum scan_direction_class SourceScan[],
6906 int ViewportWidth[],
6907 int ViewportHeight[],
6908 int SurfaceWidthY[],
6909 int SurfaceWidthC[],
6910 int SurfaceHeightY[],
6911 int SurfaceHeightC[],
6912 enum odm_combine_mode ODMCombineEnabled[],
6915 int Read256BytesBlockHeightY[],
6916 int Read256BytesBlockHeightC[],
6917 int Read256BytesBlockWidthY[],
6918 int Read256BytesBlockWidthC[],
6919 int BlendingAndTiming[],
6923 double SwathWidthSingleDPPY[],
6924 double SwathWidthSingleDPPC[],
6925 double SwathWidthY[],
6926 double SwathWidthC[],
6927 int MaximumSwathHeightY[],
6928 int MaximumSwathHeightC[],
6929 int swath_width_luma_ub[],
6930 int swath_width_chroma_ub[])
6932 enum odm_combine_mode MainPlaneODMCombine;
6935 #ifdef __DML_VBA_DEBUG__
6936 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
6939 for (k = 0; k < NumberOfActivePlanes; ++k) {
6940 if (SourceScan[k] != dm_vert) {
6941 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6943 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6946 #ifdef __DML_VBA_DEBUG__
6947 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
6948 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
6951 MainPlaneODMCombine = ODMCombineEnabled[k];
6952 for (j = 0; j < NumberOfActivePlanes; ++j) {
6953 if (BlendingAndTiming[k] == j) {
6954 MainPlaneODMCombine = ODMCombineEnabled[j];
6958 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1)
6959 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6960 else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1)
6961 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6962 else if (DPPPerPlane[k] == 2)
6963 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6965 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6967 #ifdef __DML_VBA_DEBUG__
6968 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
6969 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
6972 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6973 SwathWidthC[k] = SwathWidthY[k] / 2;
6974 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6976 SwathWidthC[k] = SwathWidthY[k];
6977 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6980 if (ForceSingleDPP == true) {
6981 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6982 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6985 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6986 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6988 #ifdef __DML_VBA_DEBUG__
6989 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
6992 if (SourceScan[k] != dm_vert) {
6993 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6994 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6995 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6996 if (BytePerPixC[k] > 0) {
6997 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6999 swath_width_chroma_ub[k] = dml_min(
7001 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
7003 swath_width_chroma_ub[k] = 0;
7006 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
7007 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
7008 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
7009 if (BytePerPixC[k] > 0) {
7010 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
7012 swath_width_chroma_ub[k] = dml_min(
7013 surface_height_ub_c,
7014 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
7016 swath_width_chroma_ub[k] = 0;
7023 static double CalculateExtraLatency(
7024 int RoundTripPingLatencyCycles,
7025 int ReorderingBytes,
7027 int TotalNumberOfActiveDPP,
7028 int PixelChunkSizeInKByte,
7029 int TotalNumberOfDCCActiveDPP,
7034 int NumberOfActivePlanes,
7036 int dpte_group_bytes[],
7037 double HostVMInefficiencyFactor,
7038 double HostVMMinPageSize,
7039 int HostVMMaxNonCachedPageTableLevels)
7041 double ExtraLatencyBytes;
7042 double ExtraLatency;
7044 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7046 TotalNumberOfActiveDPP,
7047 PixelChunkSizeInKByte,
7048 TotalNumberOfDCCActiveDPP,
7052 NumberOfActivePlanes,
7055 HostVMInefficiencyFactor,
7057 HostVMMaxNonCachedPageTableLevels);
7059 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
7061 #ifdef __DML_VBA_DEBUG__
7062 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
7063 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
7064 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
7065 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
7066 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
7069 return ExtraLatency;
7072 static double CalculateExtraLatencyBytes(
7073 int ReorderingBytes,
7074 int TotalNumberOfActiveDPP,
7075 int PixelChunkSizeInKByte,
7076 int TotalNumberOfDCCActiveDPP,
7080 int NumberOfActivePlanes,
7082 int dpte_group_bytes[],
7083 double HostVMInefficiencyFactor,
7084 double HostVMMinPageSize,
7085 int HostVMMaxNonCachedPageTableLevels)
7088 int HostVMDynamicLevels = 0, k;
7090 if (GPUVMEnable == true && HostVMEnable == true) {
7091 if (HostVMMinPageSize < 2048)
7092 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
7093 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
7094 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
7096 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
7098 HostVMDynamicLevels = 0;
7101 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
7103 if (GPUVMEnable == true) {
7104 for (k = 0; k < NumberOfActivePlanes; ++k)
7105 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
7110 static double CalculateUrgentLatency(
7111 double UrgentLatencyPixelDataOnly,
7112 double UrgentLatencyPixelMixedWithVMData,
7113 double UrgentLatencyVMDataOnly,
7114 bool DoUrgentLatencyAdjustment,
7115 double UrgentLatencyAdjustmentFabricClockComponent,
7116 double UrgentLatencyAdjustmentFabricClockReference,
7121 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
7122 if (DoUrgentLatencyAdjustment == true)
7123 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
7127 static noinline_for_stack void UseMinimumDCFCLK(
7128 struct display_mode_lib *mode_lib,
7129 int MaxPrefetchMode,
7130 int ReorderingBytes)
7132 struct vba_vars_st *v = &mode_lib->vba;
7133 int dummy1, i, j, k;
7134 double NormalEfficiency, dummy2, dummy3;
7135 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
7137 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
7138 for (i = 0; i < v->soc.num_states; ++i) {
7139 for (j = 0; j <= 1; ++j) {
7140 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
7141 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
7142 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
7143 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
7144 double MinimumTWait;
7145 double NonDPTEBandwidth;
7146 double DPTEBandwidth;
7147 double DCFCLKRequiredForAverageBandwidth;
7148 double ExtraLatencyBytes;
7149 double ExtraLatencyCycles;
7150 double DCFCLKRequiredForPeakBandwidth;
7151 int NoOfDPPState[DC__NUM_DPP__MAX];
7152 double MinimumTvmPlus2Tr0;
7154 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
7155 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7156 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
7157 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
7160 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k)
7161 NoOfDPPState[k] = v->NoOfDPP[i][j][k];
7163 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
7164 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
7165 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
7166 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
7167 DCFCLKRequiredForAverageBandwidth = dml_max3(
7168 v->ProjectedDCFCLKDeepSleep[i][j],
7169 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
7170 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
7171 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth);
7173 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7175 v->TotalNumberOfActiveDPP[i][j],
7176 v->PixelChunkSizeInKByte,
7177 v->TotalNumberOfDCCActiveDPP[i][j],
7181 v->NumberOfActivePlanes,
7183 v->dpte_group_bytes,
7185 v->HostVMMinPageSize,
7186 v->HostVMMaxNonCachedPageTableLevels);
7187 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
7188 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7189 double DCFCLKCyclesRequiredInPrefetch;
7190 double ExpectedPrefetchBWAcceleration;
7191 double PrefetchTime;
7193 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
7194 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
7195 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
7196 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
7197 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth
7198 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
7199 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
7200 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
7201 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
7202 DynamicMetadataVMExtraLatency[k] =
7203 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
7204 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
7205 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
7207 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2)
7208 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
7209 - DynamicMetadataVMExtraLatency[k];
7211 if (PrefetchTime > 0) {
7212 double ExpectedVRatioPrefetch;
7214 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
7215 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
7216 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
7217 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
7218 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
7219 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
7220 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth;
7223 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7225 if (v->DynamicMetadataEnable[k] == true) {
7230 double AllowedTimeForUrgentExtraLatency;
7232 CalculateVupdateAndDynamicMetadataParameters(
7233 v->MaxInterDCNTileRepeaters,
7234 v->RequiredDPPCLK[i][j][k],
7235 v->RequiredDISPCLK[i][j],
7236 v->ProjectedDCFCLKDeepSleep[i][j],
7239 v->VTotal[k] - v->VActive[k],
7240 v->DynamicMetadataTransmittedBytes[k],
7241 v->DynamicMetadataLinesBeforeActiveRequired[k],
7243 v->ProgressiveToInterlaceUnitInOPP,
7251 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
7252 - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
7253 if (AllowedTimeForUrgentExtraLatency > 0) {
7254 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
7255 DCFCLKRequiredForPeakBandwidthPerPlane[k],
7256 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
7258 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7262 DCFCLKRequiredForPeakBandwidth = 0;
7263 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k)
7264 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
7266 MinimumTvmPlus2Tr0 = v->UrgLatency[i]
7267 * (v->GPUVMEnable == true ?
7268 (v->HostVMEnable == true ?
7269 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) :
7271 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7272 double MaximumTvmPlus2Tr0PlusTsw;
7274 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
7275 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
7276 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
7278 DCFCLKRequiredForPeakBandwidth = dml_max3(
7279 DCFCLKRequiredForPeakBandwidth,
7280 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
7281 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
7284 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
7289 static void CalculateUnboundedRequestAndCompressedBufferSize(
7290 unsigned int DETBufferSizeInKByte,
7291 int ConfigReturnBufferSizeInKByte,
7292 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
7294 bool NoChromaPlanes,
7296 int CompressedBufferSegmentSizeInkByteFinal,
7297 enum output_encoder_class *Output,
7298 bool *UnboundedRequestEnabled,
7299 int *CompressedBufferSizeInkByte)
7301 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
7303 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]);
7304 *CompressedBufferSizeInkByte = (
7305 *UnboundedRequestEnabled == true ?
7306 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
7307 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
7308 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
7310 #ifdef __DML_VBA_DEBUG__
7311 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
7312 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
7313 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
7314 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
7315 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
7316 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
7317 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
7321 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
7323 bool ret_val = false;
7325 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
7326 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
7331 static unsigned int CalculateMaxVStartup(
7332 unsigned int VTotal,
7333 unsigned int VActive,
7334 unsigned int VBlankNom,
7335 unsigned int HTotal,
7337 bool ProgressiveTointerlaceUnitinOPP,
7339 unsigned int VBlankNomDefaultUS,
7340 double WritebackDelayTime)
7342 unsigned int MaxVStartup = 0;
7343 unsigned int vblank_size = 0;
7344 double line_time_us = HTotal / PixelClock;
7345 unsigned int vblank_actual = VTotal - VActive;
7346 unsigned int vblank_nom_default_in_line = dml_floor(VBlankNomDefaultUS / line_time_us, 1.0);
7347 unsigned int vblank_nom_input = VBlankNom; //dml_min(VBlankNom, vblank_nom_default_in_line);
7348 unsigned int vblank_avail = vblank_nom_input == 0 ? vblank_nom_default_in_line : vblank_nom_input;
7350 vblank_size = (unsigned int) dml_min(vblank_actual, vblank_avail);
7351 if (Interlace && !ProgressiveTointerlaceUnitinOPP)
7352 MaxVStartup = dml_floor(vblank_size / 2.0, 1.0);
7354 MaxVStartup = vblank_size - dml_max(1.0, dml_ceil(WritebackDelayTime / line_time_us, 1.0));
7355 if (MaxVStartup > 1023)