2 * Copyright 2017 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
28 #include "../display_mode_lib.h"
29 #include "display_mode_vba_31.h"
30 #include "../dml_inline_defs.h"
34 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
36 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
37 * ways. Unless there is something clearly wrong with it the code should
38 * remain as-is as it provides us with a guarantee from HW that it is correct.
42 #define BPP_BLENDED_PIPE 0xffffffff
43 #define DCN31_MAX_DSC_IMAGE_WIDTH 5184
44 #define DCN31_MAX_FMT_420_BUFFER_WIDTH 4096
46 // For DML-C changes that hasn't been propagated to VBA yet
47 //#define __DML_VBA_ALLOW_DELTA__
49 // Move these to ip paramaters/constant
51 // At which vstartup the DML start to try if the mode can be supported
52 #define __DML_VBA_MIN_VSTARTUP__ 9
54 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
55 #define __DML_ARB_TO_RET_DELAY__ (7 + 95)
57 // fudge factor for min dcfclk calclation
58 #define __DML_MIN_DCFCLK_FACTOR__ 1.15
64 double DCFCLKDeepSleep;
65 unsigned int DPPPerPlane;
67 enum scan_direction_class SourceScan;
68 unsigned int BlockWidth256BytesY;
69 unsigned int BlockHeight256BytesY;
70 unsigned int BlockWidth256BytesC;
71 unsigned int BlockHeight256BytesC;
72 unsigned int InterlaceEnable;
73 unsigned int NumberOfCursors;
76 unsigned int DCCEnable;
77 bool ODMCombineIsEnabled;
78 enum source_format_class SourcePixelFormat;
81 bool ProgressiveToInterlaceUnitInOPP;
85 #define BPP_BLENDED_PIPE 0xffffffff
87 static bool CalculateBytePerPixelAnd256BBlockSizes(
88 enum source_format_class SourcePixelFormat,
89 enum dm_swizzle_mode SurfaceTiling,
90 unsigned int *BytePerPixelY,
91 unsigned int *BytePerPixelC,
92 double *BytePerPixelDETY,
93 double *BytePerPixelDETC,
94 unsigned int *BlockHeight256BytesY,
95 unsigned int *BlockHeight256BytesC,
96 unsigned int *BlockWidth256BytesY,
97 unsigned int *BlockWidth256BytesC);
98 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
99 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
100 static unsigned int dscceComputeDelay(
103 unsigned int sliceWidth,
104 unsigned int numSlices,
105 enum output_format_class pixelFormat,
106 enum output_encoder_class Output);
107 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
108 static bool CalculatePrefetchSchedule(
109 struct display_mode_lib *mode_lib,
110 double HostVMInefficiencyFactor,
112 unsigned int DSCDelay,
113 double DPPCLKDelaySubtotalPlusCNVCFormater,
114 double DPPCLKDelaySCL,
115 double DPPCLKDelaySCLLBOnly,
116 double DPPCLKDelayCNVCCursor,
117 double DISPCLKDelaySubtotal,
118 unsigned int DPP_RECOUT_WIDTH,
119 enum output_format_class OutputFormat,
120 unsigned int MaxInterDCNTileRepeaters,
121 unsigned int VStartup,
122 unsigned int MaxVStartup,
123 unsigned int GPUVMPageTableLevels,
126 unsigned int HostVMMaxNonCachedPageTableLevels,
127 double HostVMMinPageSize,
128 bool DynamicMetadataEnable,
129 bool DynamicMetadataVMEnabled,
130 int DynamicMetadataLinesBeforeActiveRequired,
131 unsigned int DynamicMetadataTransmittedBytes,
132 double UrgentLatency,
133 double UrgentExtraLatency,
135 unsigned int PDEAndMetaPTEBytesFrame,
136 unsigned int MetaRowByte,
137 unsigned int PixelPTEBytesPerRow,
138 double PrefetchSourceLinesY,
139 unsigned int SwathWidthY,
140 double VInitPreFillY,
141 unsigned int MaxNumSwathY,
142 double PrefetchSourceLinesC,
143 unsigned int SwathWidthC,
144 double VInitPreFillC,
145 unsigned int MaxNumSwathC,
146 int swath_width_luma_ub,
147 int swath_width_chroma_ub,
148 unsigned int SwathHeightY,
149 unsigned int SwathHeightC,
151 double *DSTXAfterScaler,
152 double *DSTYAfterScaler,
153 double *DestinationLinesForPrefetch,
154 double *PrefetchBandwidth,
155 double *DestinationLinesToRequestVMInVBlank,
156 double *DestinationLinesToRequestRowInVBlank,
157 double *VRatioPrefetchY,
158 double *VRatioPrefetchC,
159 double *RequiredPrefetchPixDataBWLuma,
160 double *RequiredPrefetchPixDataBWChroma,
161 bool *NotEnoughTimeForDynamicMetadata,
163 double *prefetch_vmrow_bw,
167 int *VUpdateOffsetPix,
168 double *VUpdateWidthPix,
169 double *VReadyOffsetPix);
170 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
171 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
172 static void CalculateDCCConfiguration(
174 bool DCCProgrammingAssumesScanDirectionUnknown,
175 enum source_format_class SourcePixelFormat,
176 unsigned int SurfaceWidthLuma,
177 unsigned int SurfaceWidthChroma,
178 unsigned int SurfaceHeightLuma,
179 unsigned int SurfaceHeightChroma,
180 double DETBufferSize,
181 unsigned int RequestHeight256ByteLuma,
182 unsigned int RequestHeight256ByteChroma,
183 enum dm_swizzle_mode TilingFormat,
184 unsigned int BytePerPixelY,
185 unsigned int BytePerPixelC,
186 double BytePerPixelDETY,
187 double BytePerPixelDETC,
188 enum scan_direction_class ScanOrientation,
189 unsigned int *MaxUncompressedBlockLuma,
190 unsigned int *MaxUncompressedBlockChroma,
191 unsigned int *MaxCompressedBlockLuma,
192 unsigned int *MaxCompressedBlockChroma,
193 unsigned int *IndependentBlockLuma,
194 unsigned int *IndependentBlockChroma);
195 static double CalculatePrefetchSourceLines(
196 struct display_mode_lib *mode_lib,
200 bool ProgressiveToInterlaceUnitInOPP,
201 unsigned int SwathHeight,
202 unsigned int ViewportYStart,
203 double *VInitPreFill,
204 unsigned int *MaxNumSwath);
205 static unsigned int CalculateVMAndRowBytes(
206 struct display_mode_lib *mode_lib,
208 unsigned int BlockHeight256Bytes,
209 unsigned int BlockWidth256Bytes,
210 enum source_format_class SourcePixelFormat,
211 unsigned int SurfaceTiling,
212 unsigned int BytePerPixel,
213 enum scan_direction_class ScanDirection,
214 unsigned int SwathWidth,
215 unsigned int ViewportHeight,
218 unsigned int HostVMMaxNonCachedPageTableLevels,
219 unsigned int GPUVMMinPageSize,
220 unsigned int HostVMMinPageSize,
221 unsigned int PTEBufferSizeInRequests,
223 unsigned int DCCMetaPitch,
224 unsigned int *MacroTileWidth,
225 unsigned int *MetaRowByte,
226 unsigned int *PixelPTEBytesPerRow,
227 bool *PTEBufferSizeNotExceeded,
228 int *dpte_row_width_ub,
229 unsigned int *dpte_row_height,
230 unsigned int *MetaRequestWidth,
231 unsigned int *MetaRequestHeight,
232 unsigned int *meta_row_width,
233 unsigned int *meta_row_height,
235 unsigned int *dpte_group_bytes,
236 unsigned int *PixelPTEReqWidth,
237 unsigned int *PixelPTEReqHeight,
238 unsigned int *PTERequestSize,
239 int *DPDE0BytesFrame,
240 int *MetaPTEBytesFrame);
241 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
242 static void CalculateRowBandwidth(
244 enum source_format_class SourcePixelFormat,
249 unsigned int MetaRowByteLuma,
250 unsigned int MetaRowByteChroma,
251 unsigned int meta_row_height_luma,
252 unsigned int meta_row_height_chroma,
253 unsigned int PixelPTEBytesPerRowLuma,
254 unsigned int PixelPTEBytesPerRowChroma,
255 unsigned int dpte_row_height_luma,
256 unsigned int dpte_row_height_chroma,
258 double *dpte_row_bw);
260 static void CalculateFlipSchedule(
261 struct display_mode_lib *mode_lib,
262 double HostVMInefficiencyFactor,
263 double UrgentExtraLatency,
264 double UrgentLatency,
265 unsigned int GPUVMMaxPageTableLevels,
267 unsigned int HostVMMaxNonCachedPageTableLevels,
269 double HostVMMinPageSize,
270 double PDEAndMetaPTEBytesPerFrame,
272 double DPTEBytesPerRow,
273 double BandwidthAvailableForImmediateFlip,
274 unsigned int TotImmediateFlipBytes,
275 enum source_format_class SourcePixelFormat,
281 unsigned int dpte_row_height,
282 unsigned int meta_row_height,
283 unsigned int dpte_row_height_chroma,
284 unsigned int meta_row_height_chroma,
285 double *DestinationLinesToRequestVMInImmediateFlip,
286 double *DestinationLinesToRequestRowInImmediateFlip,
287 double *final_flip_bw,
288 bool *ImmediateFlipSupportedForPipe);
289 static double CalculateWriteBackDelay(
290 enum source_format_class WritebackPixelFormat,
291 double WritebackHRatio,
292 double WritebackVRatio,
293 unsigned int WritebackVTaps,
294 int WritebackDestinationWidth,
295 int WritebackDestinationHeight,
296 int WritebackSourceHeight,
297 unsigned int HTotal);
299 static void CalculateVupdateAndDynamicMetadataParameters(
300 int MaxInterDCNTileRepeaters,
303 double DCFClkDeepSleep,
307 int DynamicMetadataTransmittedBytes,
308 int DynamicMetadataLinesBeforeActiveRequired,
310 bool ProgressiveToInterlaceUnitInOPP,
315 int *VUpdateOffsetPix,
316 double *VUpdateWidthPix,
317 double *VReadyOffsetPix);
319 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
320 struct display_mode_lib *mode_lib,
321 unsigned int PrefetchMode,
322 unsigned int NumberOfActivePlanes,
323 unsigned int MaxLineBufferLines,
324 unsigned int LineBufferSize,
325 unsigned int WritebackInterfaceBufferSize,
328 bool SynchronizedVBlank,
329 unsigned int dpte_group_bytes[],
330 unsigned int MetaChunkSize,
331 double UrgentLatency,
333 double WritebackLatency,
334 double WritebackChunkSize,
336 double DRAMClockChangeLatency,
338 double SREnterPlusExitTime,
340 double SREnterPlusExitZ8Time,
341 double DCFCLKDeepSleep,
342 unsigned int DETBufferSizeY[],
343 unsigned int DETBufferSizeC[],
344 unsigned int SwathHeightY[],
345 unsigned int SwathHeightC[],
346 unsigned int LBBitPerPixel[],
347 double SwathWidthY[],
348 double SwathWidthC[],
350 double HRatioChroma[],
351 unsigned int vtaps[],
352 unsigned int VTAPsChroma[],
354 double VRatioChroma[],
355 unsigned int HTotal[],
357 unsigned int BlendingAndTiming[],
358 unsigned int DPPPerPlane[],
359 double BytePerPixelDETY[],
360 double BytePerPixelDETC[],
361 double DSTXAfterScaler[],
362 double DSTYAfterScaler[],
363 bool WritebackEnable[],
364 enum source_format_class WritebackPixelFormat[],
365 double WritebackDestinationWidth[],
366 double WritebackDestinationHeight[],
367 double WritebackSourceHeight[],
368 bool UnboundedRequestEnabled,
369 int unsigned CompressedBufferSizeInkByte,
370 enum clock_change_support *DRAMClockChangeSupport,
371 double *UrgentWatermark,
372 double *WritebackUrgentWatermark,
373 double *DRAMClockChangeWatermark,
374 double *WritebackDRAMClockChangeWatermark,
375 double *StutterExitWatermark,
376 double *StutterEnterPlusExitWatermark,
377 double *Z8StutterExitWatermark,
378 double *Z8StutterEnterPlusExitWatermark,
379 double *MinActiveDRAMClockChangeLatencySupported);
381 static void CalculateDCFCLKDeepSleep(
382 struct display_mode_lib *mode_lib,
383 unsigned int NumberOfActivePlanes,
387 double VRatioChroma[],
388 double SwathWidthY[],
389 double SwathWidthC[],
390 unsigned int DPPPerPlane[],
392 double HRatioChroma[],
394 double PSCL_THROUGHPUT[],
395 double PSCL_THROUGHPUT_CHROMA[],
397 double ReadBandwidthLuma[],
398 double ReadBandwidthChroma[],
400 double *DCFCLKDeepSleep);
402 static void CalculateUrgentBurstFactor(
403 int swath_width_luma_ub,
404 int swath_width_chroma_ub,
405 unsigned int SwathHeightY,
406 unsigned int SwathHeightC,
408 double UrgentLatency,
409 double CursorBufferSize,
410 unsigned int CursorWidth,
411 unsigned int CursorBPP,
414 double BytePerPixelInDETY,
415 double BytePerPixelInDETC,
416 double DETBufferSizeY,
417 double DETBufferSizeC,
418 double *UrgentBurstFactorCursor,
419 double *UrgentBurstFactorLuma,
420 double *UrgentBurstFactorChroma,
421 bool *NotEnoughUrgentLatencyHiding);
423 static void UseMinimumDCFCLK(
424 struct display_mode_lib *mode_lib,
425 int MaxInterDCNTileRepeaters,
427 double FinalDRAMClockChangeLatency,
428 double SREnterPlusExitTime,
430 int RoundTripPingLatencyCycles,
432 int PixelChunkSizeInKByte,
435 int GPUVMMaxPageTableLevels,
437 int NumberOfActivePlanes,
438 double HostVMMinPageSize,
439 int HostVMMaxNonCachedPageTableLevels,
440 bool DynamicMetadataVMEnabled,
441 enum immediate_flip_requirement ImmediateFlipRequirement,
442 bool ProgressiveToInterlaceUnitInOPP,
443 double MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation,
444 double PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency,
447 int DynamicMetadataTransmittedBytes[],
448 int DynamicMetadataLinesBeforeActiveRequired[],
450 double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
451 double RequiredDISPCLK[][2],
453 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
454 double ProjectedDCFCLKDeepSleep[][2],
455 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
456 double TotalVActivePixelBandwidth[][2],
457 double TotalVActiveCursorBandwidth[][2],
458 double TotalMetaRowBandwidth[][2],
459 double TotalDPTERowBandwidth[][2],
460 unsigned int TotalNumberOfActiveDPP[][2],
461 unsigned int TotalNumberOfDCCActiveDPP[][2],
462 int dpte_group_bytes[],
463 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
464 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
465 int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
466 int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
471 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
472 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
473 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
474 bool DynamicMetadataEnable[],
475 double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
476 double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
477 double ReadBandwidthLuma[],
478 double ReadBandwidthChroma[],
479 double DCFCLKPerState[],
480 double DCFCLKState[][2]);
482 static void CalculatePixelDeliveryTimes(
483 unsigned int NumberOfActivePlanes,
485 double VRatioChroma[],
486 double VRatioPrefetchY[],
487 double VRatioPrefetchC[],
488 unsigned int swath_width_luma_ub[],
489 unsigned int swath_width_chroma_ub[],
490 unsigned int DPPPerPlane[],
492 double HRatioChroma[],
494 double PSCL_THROUGHPUT[],
495 double PSCL_THROUGHPUT_CHROMA[],
498 enum scan_direction_class SourceScan[],
499 unsigned int NumberOfCursors[],
500 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
501 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
502 unsigned int BlockWidth256BytesY[],
503 unsigned int BlockHeight256BytesY[],
504 unsigned int BlockWidth256BytesC[],
505 unsigned int BlockHeight256BytesC[],
506 double DisplayPipeLineDeliveryTimeLuma[],
507 double DisplayPipeLineDeliveryTimeChroma[],
508 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
509 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
510 double DisplayPipeRequestDeliveryTimeLuma[],
511 double DisplayPipeRequestDeliveryTimeChroma[],
512 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
513 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
514 double CursorRequestDeliveryTime[],
515 double CursorRequestDeliveryTimePrefetch[]);
517 static void CalculateMetaAndPTETimes(
518 int NumberOfActivePlanes,
521 int MinMetaChunkSizeBytes,
524 double VRatioChroma[],
525 double DestinationLinesToRequestRowInVBlank[],
526 double DestinationLinesToRequestRowInImmediateFlip[],
531 enum scan_direction_class SourceScan[],
532 int dpte_row_height[],
533 int dpte_row_height_chroma[],
534 int meta_row_width[],
535 int meta_row_width_chroma[],
536 int meta_row_height[],
537 int meta_row_height_chroma[],
538 int meta_req_width[],
539 int meta_req_width_chroma[],
540 int meta_req_height[],
541 int meta_req_height_chroma[],
542 int dpte_group_bytes[],
543 int PTERequestSizeY[],
544 int PTERequestSizeC[],
545 int PixelPTEReqWidthY[],
546 int PixelPTEReqHeightY[],
547 int PixelPTEReqWidthC[],
548 int PixelPTEReqHeightC[],
549 int dpte_row_width_luma_ub[],
550 int dpte_row_width_chroma_ub[],
551 double DST_Y_PER_PTE_ROW_NOM_L[],
552 double DST_Y_PER_PTE_ROW_NOM_C[],
553 double DST_Y_PER_META_ROW_NOM_L[],
554 double DST_Y_PER_META_ROW_NOM_C[],
555 double TimePerMetaChunkNominal[],
556 double TimePerChromaMetaChunkNominal[],
557 double TimePerMetaChunkVBlank[],
558 double TimePerChromaMetaChunkVBlank[],
559 double TimePerMetaChunkFlip[],
560 double TimePerChromaMetaChunkFlip[],
561 double time_per_pte_group_nom_luma[],
562 double time_per_pte_group_vblank_luma[],
563 double time_per_pte_group_flip_luma[],
564 double time_per_pte_group_nom_chroma[],
565 double time_per_pte_group_vblank_chroma[],
566 double time_per_pte_group_flip_chroma[]);
568 static void CalculateVMGroupAndRequestTimes(
569 unsigned int NumberOfActivePlanes,
571 unsigned int GPUVMMaxPageTableLevels,
572 unsigned int HTotal[],
574 double DestinationLinesToRequestVMInVBlank[],
575 double DestinationLinesToRequestVMInImmediateFlip[],
578 int dpte_row_width_luma_ub[],
579 int dpte_row_width_chroma_ub[],
580 int vm_group_bytes[],
581 unsigned int dpde0_bytes_per_frame_ub_l[],
582 unsigned int dpde0_bytes_per_frame_ub_c[],
583 int meta_pte_bytes_per_frame_ub_l[],
584 int meta_pte_bytes_per_frame_ub_c[],
585 double TimePerVMGroupVBlank[],
586 double TimePerVMGroupFlip[],
587 double TimePerVMRequestVBlank[],
588 double TimePerVMRequestFlip[]);
590 static void CalculateStutterEfficiency(
591 struct display_mode_lib *mode_lib,
592 int CompressedBufferSizeInkByte,
593 bool UnboundedRequestEnabled,
594 int ConfigReturnBufferSizeInKByte,
595 int MetaFIFOSizeInKEntries,
596 int ZeroSizeBufferEntries,
597 int NumberOfActivePlanes,
598 int ROBBufferSizeInKByte,
599 double TotalDataReadBandwidth,
602 double COMPBUF_RESERVED_SPACE_64B,
603 double COMPBUF_RESERVED_SPACE_ZS,
606 bool SynchronizedVBlank,
607 double Z8StutterEnterPlusExitWatermark,
608 double StutterEnterPlusExitWatermark,
609 bool ProgressiveToInterlaceUnitInOPP,
611 double MinTTUVBlank[],
613 unsigned int DETBufferSizeY[],
615 double BytePerPixelDETY[],
616 double SwathWidthY[],
619 double NetDCCRateLuma[],
620 double NetDCCRateChroma[],
621 double DCCFractionOfZeroSizeRequestsLuma[],
622 double DCCFractionOfZeroSizeRequestsChroma[],
627 enum scan_direction_class SourceScan[],
628 int BlockHeight256BytesY[],
629 int BlockWidth256BytesY[],
630 int BlockHeight256BytesC[],
631 int BlockWidth256BytesC[],
632 int DCCYMaxUncompressedBlock[],
633 int DCCCMaxUncompressedBlock[],
636 bool WritebackEnable[],
637 double ReadBandwidthPlaneLuma[],
638 double ReadBandwidthPlaneChroma[],
639 double meta_row_bw[],
640 double dpte_row_bw[],
641 double *StutterEfficiencyNotIncludingVBlank,
642 double *StutterEfficiency,
643 int *NumberOfStutterBurstsPerFrame,
644 double *Z8StutterEfficiencyNotIncludingVBlank,
645 double *Z8StutterEfficiency,
646 int *Z8NumberOfStutterBurstsPerFrame,
647 double *StutterPeriod);
649 static void CalculateSwathAndDETConfiguration(
651 int NumberOfActivePlanes,
652 unsigned int DETBufferSizeInKByte,
653 double MaximumSwathWidthLuma[],
654 double MaximumSwathWidthChroma[],
655 enum scan_direction_class SourceScan[],
656 enum source_format_class SourcePixelFormat[],
657 enum dm_swizzle_mode SurfaceTiling[],
659 int ViewportHeight[],
662 int SurfaceHeightY[],
663 int SurfaceHeightC[],
664 int Read256BytesBlockHeightY[],
665 int Read256BytesBlockHeightC[],
666 int Read256BytesBlockWidthY[],
667 int Read256BytesBlockWidthC[],
668 enum odm_combine_mode ODMCombineEnabled[],
669 int BlendingAndTiming[],
672 double BytePerPixDETY[],
673 double BytePerPixDETC[],
676 double HRatioChroma[],
678 int swath_width_luma_ub[],
679 int swath_width_chroma_ub[],
681 double SwathWidthChroma[],
684 unsigned int DETBufferSizeY[],
685 unsigned int DETBufferSizeC[],
686 bool ViewportSizeSupportPerPlane[],
687 bool *ViewportSizeSupport);
688 static void CalculateSwathWidth(
690 int NumberOfActivePlanes,
691 enum source_format_class SourcePixelFormat[],
692 enum scan_direction_class SourceScan[],
694 int ViewportHeight[],
697 int SurfaceHeightY[],
698 int SurfaceHeightC[],
699 enum odm_combine_mode ODMCombineEnabled[],
702 int Read256BytesBlockHeightY[],
703 int Read256BytesBlockHeightC[],
704 int Read256BytesBlockWidthY[],
705 int Read256BytesBlockWidthC[],
706 int BlendingAndTiming[],
710 double SwathWidthSingleDPPY[],
711 double SwathWidthSingleDPPC[],
712 double SwathWidthY[],
713 double SwathWidthC[],
714 int MaximumSwathHeightY[],
715 int MaximumSwathHeightC[],
716 int swath_width_luma_ub[],
717 int swath_width_chroma_ub[]);
719 static double CalculateExtraLatency(
720 int RoundTripPingLatencyCycles,
723 int TotalNumberOfActiveDPP,
724 int PixelChunkSizeInKByte,
725 int TotalNumberOfDCCActiveDPP,
730 int NumberOfActivePlanes,
732 int dpte_group_bytes[],
733 double HostVMInefficiencyFactor,
734 double HostVMMinPageSize,
735 int HostVMMaxNonCachedPageTableLevels);
737 static double CalculateExtraLatencyBytes(
739 int TotalNumberOfActiveDPP,
740 int PixelChunkSizeInKByte,
741 int TotalNumberOfDCCActiveDPP,
745 int NumberOfActivePlanes,
747 int dpte_group_bytes[],
748 double HostVMInefficiencyFactor,
749 double HostVMMinPageSize,
750 int HostVMMaxNonCachedPageTableLevels);
752 static double CalculateUrgentLatency(
753 double UrgentLatencyPixelDataOnly,
754 double UrgentLatencyPixelMixedWithVMData,
755 double UrgentLatencyVMDataOnly,
756 bool DoUrgentLatencyAdjustment,
757 double UrgentLatencyAdjustmentFabricClockComponent,
758 double UrgentLatencyAdjustmentFabricClockReference,
759 double FabricClockSingle);
761 static void CalculateUnboundedRequestAndCompressedBufferSize(
762 unsigned int DETBufferSizeInKByte,
763 int ConfigReturnBufferSizeInKByte,
764 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
768 int CompressedBufferSegmentSizeInkByteFinal,
769 enum output_encoder_class *Output,
770 bool *UnboundedRequestEnabled,
771 int *CompressedBufferSizeInkByte);
773 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
775 void dml31_recalculate(struct display_mode_lib *mode_lib)
777 ModeSupportAndSystemConfiguration(mode_lib);
778 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
779 DisplayPipeConfiguration(mode_lib);
780 #ifdef __DML_VBA_DEBUG__
781 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
783 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
786 static unsigned int dscceComputeDelay(
789 unsigned int sliceWidth,
790 unsigned int numSlices,
791 enum output_format_class pixelFormat,
792 enum output_encoder_class Output)
794 // valid bpc = source bits per component in the set of {8, 10, 12}
795 // valid bpp = increments of 1/16 of a bit
796 // min = 6/7/8 in N420/N422/444, respectively
797 // max = such that compression is 1:1
798 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
799 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
800 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
803 unsigned int rcModelSize = 8192;
805 // N422/N420 operate at 2 pixels per clock
806 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
808 if (pixelFormat == dm_420)
810 else if (pixelFormat == dm_444)
812 else if (pixelFormat == dm_n422)
814 // #all other modes operate at 1 pixel per clock
818 //initial transmit delay as per PPS
819 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
829 //divide by pixel per cycle to compute slice width as seen by DSC
830 w = sliceWidth / pixelsPerClock;
832 //422 mode has an additional cycle of delay
833 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
838 //main calculation for the dscce
839 ix = initalXmitDelay + 45;
844 ax = (a + 2) / 3 + D + 6 + 1;
845 L = (ax + wx - 1) / wx;
846 if ((ix % w) == 0 && P != 0)
850 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
852 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
853 pixels = Delay * 3 * pixelsPerClock;
857 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
859 unsigned int Delay = 0;
861 if (pixelFormat == dm_420) {
866 // dscc - input deserializer
868 // dscc gets pixels every other cycle
870 // dscc - input cdc fifo
872 // dscc gets pixels every other cycle
874 // dscc - cdc uncertainty
876 // dscc - output cdc fifo
878 // dscc gets pixels every other cycle
880 // dscc - cdc uncertainty
882 // dscc - output serializer
886 } else if (pixelFormat == dm_n422) {
891 // dscc - input deserializer
893 // dscc - input cdc fifo
895 // dscc - cdc uncertainty
897 // dscc - output cdc fifo
899 // dscc - cdc uncertainty
901 // dscc - output serializer
910 // dscc - input deserializer
912 // dscc - input cdc fifo
914 // dscc - cdc uncertainty
916 // dscc - output cdc fifo
918 // dscc - output serializer
920 // dscc - cdc uncertainty
929 static bool CalculatePrefetchSchedule(
930 struct display_mode_lib *mode_lib,
931 double HostVMInefficiencyFactor,
933 unsigned int DSCDelay,
934 double DPPCLKDelaySubtotalPlusCNVCFormater,
935 double DPPCLKDelaySCL,
936 double DPPCLKDelaySCLLBOnly,
937 double DPPCLKDelayCNVCCursor,
938 double DISPCLKDelaySubtotal,
939 unsigned int DPP_RECOUT_WIDTH,
940 enum output_format_class OutputFormat,
941 unsigned int MaxInterDCNTileRepeaters,
942 unsigned int VStartup,
943 unsigned int MaxVStartup,
944 unsigned int GPUVMPageTableLevels,
947 unsigned int HostVMMaxNonCachedPageTableLevels,
948 double HostVMMinPageSize,
949 bool DynamicMetadataEnable,
950 bool DynamicMetadataVMEnabled,
951 int DynamicMetadataLinesBeforeActiveRequired,
952 unsigned int DynamicMetadataTransmittedBytes,
953 double UrgentLatency,
954 double UrgentExtraLatency,
956 unsigned int PDEAndMetaPTEBytesFrame,
957 unsigned int MetaRowByte,
958 unsigned int PixelPTEBytesPerRow,
959 double PrefetchSourceLinesY,
960 unsigned int SwathWidthY,
961 double VInitPreFillY,
962 unsigned int MaxNumSwathY,
963 double PrefetchSourceLinesC,
964 unsigned int SwathWidthC,
965 double VInitPreFillC,
966 unsigned int MaxNumSwathC,
967 int swath_width_luma_ub,
968 int swath_width_chroma_ub,
969 unsigned int SwathHeightY,
970 unsigned int SwathHeightC,
972 double *DSTXAfterScaler,
973 double *DSTYAfterScaler,
974 double *DestinationLinesForPrefetch,
975 double *PrefetchBandwidth,
976 double *DestinationLinesToRequestVMInVBlank,
977 double *DestinationLinesToRequestRowInVBlank,
978 double *VRatioPrefetchY,
979 double *VRatioPrefetchC,
980 double *RequiredPrefetchPixDataBWLuma,
981 double *RequiredPrefetchPixDataBWChroma,
982 bool *NotEnoughTimeForDynamicMetadata,
984 double *prefetch_vmrow_bw,
988 int *VUpdateOffsetPix,
989 double *VUpdateWidthPix,
990 double *VReadyOffsetPix)
992 bool MyError = false;
993 unsigned int DPPCycles, DISPCLKCycles;
994 double DSTTotalPixelsAfterScaler;
996 double dst_y_prefetch_equ;
998 double prefetch_bw_oto;
1001 double Tvm_oto_lines;
1002 double Tr0_oto_lines;
1003 double dst_y_prefetch_oto;
1004 double TimeForFetchingMetaPTE = 0;
1005 double TimeForFetchingRowInVBlank = 0;
1006 double LinesToRequestPrefetchPixelData = 0;
1007 unsigned int HostVMDynamicLevelsTrips;
1011 double Tvm_trips_rounded;
1012 double Tr0_trips_rounded;
1014 double Tpre_rounded;
1015 double prefetch_bw_equ;
1021 double prefetch_sw_bytes;
1024 int max_vratio_pre = 4;
1026 double Tsw_est1 = 0;
1027 double Tsw_est3 = 0;
1029 if (GPUVMEnable == true && HostVMEnable == true) {
1030 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
1032 HostVMDynamicLevelsTrips = 0;
1034 #ifdef __DML_VBA_DEBUG__
1035 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
1037 CalculateVupdateAndDynamicMetadataParameters(
1038 MaxInterDCNTileRepeaters,
1041 myPipe->DCFCLKDeepSleep,
1045 DynamicMetadataTransmittedBytes,
1046 DynamicMetadataLinesBeforeActiveRequired,
1047 myPipe->InterlaceEnable,
1048 myPipe->ProgressiveToInterlaceUnitInOPP,
1057 LineTime = myPipe->HTotal / myPipe->PixelClock;
1058 trip_to_mem = UrgentLatency;
1059 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
1061 #ifdef __DML_VBA_ALLOW_DELTA__
1062 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
1064 if (DynamicMetadataVMEnabled == true) {
1066 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
1068 *Tdmdl = TWait + UrgentExtraLatency;
1071 #ifdef __DML_VBA_ALLOW_DELTA__
1072 if (DynamicMetadataEnable == false) {
1077 if (DynamicMetadataEnable == true) {
1078 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
1079 *NotEnoughTimeForDynamicMetadata = true;
1080 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
1081 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
1082 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
1083 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
1084 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *Tdmdl);
1086 *NotEnoughTimeForDynamicMetadata = false;
1089 *NotEnoughTimeForDynamicMetadata = false;
1092 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
1094 if (myPipe->ScalerEnabled)
1095 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
1097 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
1099 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
1101 DISPCLKCycles = DISPCLKDelaySubtotal;
1103 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
1106 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
1108 #ifdef __DML_VBA_DEBUG__
1109 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
1110 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
1111 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
1112 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
1113 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
1114 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
1115 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
1116 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
1119 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1121 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
1122 *DSTYAfterScaler = 1;
1124 *DSTYAfterScaler = 0;
1126 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1127 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1128 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1130 #ifdef __DML_VBA_DEBUG__
1131 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
1136 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1137 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1138 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1140 #ifdef __DML_VBA_ALLOW_DELTA__
1141 if (!myPipe->DCCEnable) {
1143 Tr0_trips_rounded = 0.0;
1149 Tvm_trips_rounded = 0.0;
1153 if (GPUVMPageTableLevels >= 3) {
1154 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1158 } else if (!myPipe->DCCEnable) {
1161 *Tno_bw = LineTime / 4;
1164 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
1165 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
1167 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
1169 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
1170 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
1172 min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
1173 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
1174 Tsw_oto = Lsw_oto * LineTime;
1176 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto;
1178 #ifdef __DML_VBA_DEBUG__
1179 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
1180 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
1181 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
1182 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
1183 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
1184 dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
1187 if (GPUVMEnable == true)
1188 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0);
1190 Tvm_oto = LineTime / 4.0;
1192 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1193 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term)
1197 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1200 #ifdef __DML_VBA_DEBUG__
1201 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
1202 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
1203 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
1204 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1205 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1206 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1207 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
1208 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
1209 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
1212 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1213 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1214 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1215 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1216 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1217 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1219 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
1221 if (prefetch_sw_bytes < dep_bytes)
1222 prefetch_sw_bytes = 2 * dep_bytes;
1224 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1225 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
1226 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
1227 dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
1228 dml_print("DML: LineTime: %f\n", LineTime);
1229 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
1231 dml_print("DML: LineTime: %f\n", LineTime);
1232 dml_print("DML: VStartup: %d\n", VStartup);
1233 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1234 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
1235 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1236 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1237 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1238 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1239 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1240 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm);
1241 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1242 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler);
1243 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler \n", *DSTYAfterScaler);
1245 *PrefetchBandwidth = 0;
1246 *DestinationLinesToRequestVMInVBlank = 0;
1247 *DestinationLinesToRequestRowInVBlank = 0;
1248 *VRatioPrefetchY = 0;
1249 *VRatioPrefetchC = 0;
1250 *RequiredPrefetchPixDataBWLuma = 0;
1251 if (dst_y_prefetch_equ > 1) {
1252 double PrefetchBandwidth1;
1253 double PrefetchBandwidth2;
1254 double PrefetchBandwidth3;
1255 double PrefetchBandwidth4;
1257 if (Tpre_rounded - *Tno_bw > 0) {
1258 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1259 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
1260 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
1262 PrefetchBandwidth1 = 0;
1265 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
1266 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1267 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
1270 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1271 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1273 PrefetchBandwidth2 = 0;
1275 if (Tpre_rounded - Tvm_trips_rounded > 0) {
1276 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1277 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
1278 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
1280 PrefetchBandwidth3 = 0;
1283 #ifdef __DML_VBA_DEBUG__
1284 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
1285 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
1286 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
1288 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1289 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1290 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
1293 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1294 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1296 PrefetchBandwidth4 = 0;
1303 if (PrefetchBandwidth1 > 0) {
1304 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
1305 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1314 if (PrefetchBandwidth2 > 0) {
1315 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
1316 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1325 if (PrefetchBandwidth3 > 0) {
1326 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
1327 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1337 prefetch_bw_equ = PrefetchBandwidth1;
1338 } else if (Case2OK) {
1339 prefetch_bw_equ = PrefetchBandwidth2;
1340 } else if (Case3OK) {
1341 prefetch_bw_equ = PrefetchBandwidth3;
1343 prefetch_bw_equ = PrefetchBandwidth4;
1346 #ifdef __DML_VBA_DEBUG__
1347 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
1348 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
1349 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
1350 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
1353 if (prefetch_bw_equ > 0) {
1354 if (GPUVMEnable == true) {
1355 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1357 Tvm_equ = LineTime / 4;
1360 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1362 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1364 (LineTime - Tvm_equ) / 2,
1367 Tr0_equ = (LineTime - Tvm_equ) / 2;
1372 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1376 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1377 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1378 TimeForFetchingMetaPTE = Tvm_oto;
1379 TimeForFetchingRowInVBlank = Tr0_oto;
1380 *PrefetchBandwidth = prefetch_bw_oto;
1382 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1383 TimeForFetchingMetaPTE = Tvm_equ;
1384 TimeForFetchingRowInVBlank = Tr0_equ;
1385 *PrefetchBandwidth = prefetch_bw_equ;
1388 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1390 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1392 #ifdef __DML_VBA_ALLOW_DELTA__
1393 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
1394 // See note above dated 5/30/2018
1395 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
1396 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
1398 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
1401 #ifdef __DML_VBA_DEBUG__
1402 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
1403 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1404 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
1405 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1406 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1407 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1408 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
1411 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1413 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
1414 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1415 #ifdef __DML_VBA_DEBUG__
1416 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1417 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
1418 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
1420 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1421 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1422 *VRatioPrefetchY = dml_max(
1423 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1424 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1425 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1428 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1429 *VRatioPrefetchY = 0;
1431 #ifdef __DML_VBA_DEBUG__
1432 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1433 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1434 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
1438 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1439 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1441 #ifdef __DML_VBA_DEBUG__
1442 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1443 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
1444 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
1446 if ((SwathHeightC > 4)) {
1447 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1448 *VRatioPrefetchC = dml_max(
1450 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1451 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1454 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1455 *VRatioPrefetchC = 0;
1457 #ifdef __DML_VBA_DEBUG__
1458 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1459 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
1460 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
1464 #ifdef __DML_VBA_DEBUG__
1465 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
1466 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
1467 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1470 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
1472 #ifdef __DML_VBA_DEBUG__
1473 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
1476 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
1480 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1481 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1482 *VRatioPrefetchY = 0;
1483 *VRatioPrefetchC = 0;
1484 *RequiredPrefetchPixDataBWLuma = 0;
1485 *RequiredPrefetchPixDataBWChroma = 0;
1489 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
1490 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1491 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1492 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1494 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
1495 (double) LinesToRequestPrefetchPixelData * LineTime);
1496 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
1497 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) /
1498 (double) myPipe->HTotal)) * LineTime);
1499 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1500 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n",
1501 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
1502 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
1503 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1507 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1511 double prefetch_vm_bw;
1512 double prefetch_row_bw;
1514 if (PDEAndMetaPTEBytesFrame == 0) {
1516 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1517 #ifdef __DML_VBA_DEBUG__
1518 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1519 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1520 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1521 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1523 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1524 #ifdef __DML_VBA_DEBUG__
1525 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1530 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1533 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1534 prefetch_row_bw = 0;
1535 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1536 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1538 #ifdef __DML_VBA_DEBUG__
1539 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1540 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1541 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1542 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1545 prefetch_row_bw = 0;
1547 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1550 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1554 *PrefetchBandwidth = 0;
1555 TimeForFetchingMetaPTE = 0;
1556 TimeForFetchingRowInVBlank = 0;
1557 *DestinationLinesToRequestVMInVBlank = 0;
1558 *DestinationLinesToRequestRowInVBlank = 0;
1559 *DestinationLinesForPrefetch = 0;
1560 LinesToRequestPrefetchPixelData = 0;
1561 *VRatioPrefetchY = 0;
1562 *VRatioPrefetchC = 0;
1563 *RequiredPrefetchPixDataBWLuma = 0;
1564 *RequiredPrefetchPixDataBWChroma = 0;
1570 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1572 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1575 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1577 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1580 static void CalculateDCCConfiguration(
1582 bool DCCProgrammingAssumesScanDirectionUnknown,
1583 enum source_format_class SourcePixelFormat,
1584 unsigned int SurfaceWidthLuma,
1585 unsigned int SurfaceWidthChroma,
1586 unsigned int SurfaceHeightLuma,
1587 unsigned int SurfaceHeightChroma,
1588 double DETBufferSize,
1589 unsigned int RequestHeight256ByteLuma,
1590 unsigned int RequestHeight256ByteChroma,
1591 enum dm_swizzle_mode TilingFormat,
1592 unsigned int BytePerPixelY,
1593 unsigned int BytePerPixelC,
1594 double BytePerPixelDETY,
1595 double BytePerPixelDETC,
1596 enum scan_direction_class ScanOrientation,
1597 unsigned int *MaxUncompressedBlockLuma,
1598 unsigned int *MaxUncompressedBlockChroma,
1599 unsigned int *MaxCompressedBlockLuma,
1600 unsigned int *MaxCompressedBlockChroma,
1601 unsigned int *IndependentBlockLuma,
1602 unsigned int *IndependentBlockChroma)
1611 double detile_buf_vp_horz_limit;
1612 double detile_buf_vp_vert_limit;
1614 int MAS_vp_horz_limit;
1615 int MAS_vp_vert_limit;
1616 int max_vp_horz_width;
1617 int max_vp_vert_height;
1618 int eff_surf_width_l;
1619 int eff_surf_width_c;
1620 int eff_surf_height_l;
1621 int eff_surf_height_c;
1623 int full_swath_bytes_horz_wc_l;
1624 int full_swath_bytes_horz_wc_c;
1625 int full_swath_bytes_vert_wc_l;
1626 int full_swath_bytes_vert_wc_c;
1627 int req128_horz_wc_l;
1628 int req128_horz_wc_c;
1629 int req128_vert_wc_l;
1630 int req128_vert_wc_c;
1631 int segment_order_horz_contiguous_luma;
1632 int segment_order_horz_contiguous_chroma;
1633 int segment_order_vert_contiguous_luma;
1634 int segment_order_vert_contiguous_chroma;
1637 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
1639 RequestType RequestLuma;
1640 RequestType RequestChroma;
1642 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1648 if (BytePerPixelY == 1)
1650 if (BytePerPixelC == 1)
1652 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1654 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1657 if (BytePerPixelC == 0) {
1658 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1659 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
1660 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1662 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1663 detile_buf_vp_horz_limit = (double) swath_buf_size
1664 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
1665 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
1666 detile_buf_vp_vert_limit = (double) swath_buf_size
1667 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
1670 if (SourcePixelFormat == dm_420_10) {
1671 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1672 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1675 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1676 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1678 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
1679 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1680 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1681 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1682 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1683 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1684 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
1685 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1687 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1688 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1689 if (BytePerPixelC > 0) {
1690 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
1691 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1693 full_swath_bytes_horz_wc_c = 0;
1694 full_swath_bytes_vert_wc_c = 0;
1697 if (SourcePixelFormat == dm_420_10) {
1698 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1699 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1700 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1701 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1704 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1705 req128_horz_wc_l = 0;
1706 req128_horz_wc_c = 0;
1707 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
1708 req128_horz_wc_l = 0;
1709 req128_horz_wc_c = 1;
1710 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1711 req128_horz_wc_l = 1;
1712 req128_horz_wc_c = 0;
1714 req128_horz_wc_l = 1;
1715 req128_horz_wc_c = 1;
1718 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1719 req128_vert_wc_l = 0;
1720 req128_vert_wc_c = 0;
1721 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
1722 req128_vert_wc_l = 0;
1723 req128_vert_wc_c = 1;
1724 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1725 req128_vert_wc_l = 1;
1726 req128_vert_wc_c = 0;
1728 req128_vert_wc_l = 1;
1729 req128_vert_wc_c = 1;
1732 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1733 segment_order_horz_contiguous_luma = 0;
1735 segment_order_horz_contiguous_luma = 1;
1737 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1738 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1739 segment_order_vert_contiguous_luma = 0;
1741 segment_order_vert_contiguous_luma = 1;
1743 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1744 segment_order_horz_contiguous_chroma = 0;
1746 segment_order_horz_contiguous_chroma = 1;
1748 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1749 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1750 segment_order_vert_contiguous_chroma = 0;
1752 segment_order_vert_contiguous_chroma = 1;
1755 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1756 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1757 RequestLuma = REQ_256Bytes;
1758 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1759 RequestLuma = REQ_128BytesNonContiguous;
1761 RequestLuma = REQ_128BytesContiguous;
1763 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1764 RequestChroma = REQ_256Bytes;
1765 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
1766 RequestChroma = REQ_128BytesNonContiguous;
1768 RequestChroma = REQ_128BytesContiguous;
1770 } else if (ScanOrientation != dm_vert) {
1771 if (req128_horz_wc_l == 0) {
1772 RequestLuma = REQ_256Bytes;
1773 } else if (segment_order_horz_contiguous_luma == 0) {
1774 RequestLuma = REQ_128BytesNonContiguous;
1776 RequestLuma = REQ_128BytesContiguous;
1778 if (req128_horz_wc_c == 0) {
1779 RequestChroma = REQ_256Bytes;
1780 } else if (segment_order_horz_contiguous_chroma == 0) {
1781 RequestChroma = REQ_128BytesNonContiguous;
1783 RequestChroma = REQ_128BytesContiguous;
1786 if (req128_vert_wc_l == 0) {
1787 RequestLuma = REQ_256Bytes;
1788 } else if (segment_order_vert_contiguous_luma == 0) {
1789 RequestLuma = REQ_128BytesNonContiguous;
1791 RequestLuma = REQ_128BytesContiguous;
1793 if (req128_vert_wc_c == 0) {
1794 RequestChroma = REQ_256Bytes;
1795 } else if (segment_order_vert_contiguous_chroma == 0) {
1796 RequestChroma = REQ_128BytesNonContiguous;
1798 RequestChroma = REQ_128BytesContiguous;
1802 if (RequestLuma == REQ_256Bytes) {
1803 *MaxUncompressedBlockLuma = 256;
1804 *MaxCompressedBlockLuma = 256;
1805 *IndependentBlockLuma = 0;
1806 } else if (RequestLuma == REQ_128BytesContiguous) {
1807 *MaxUncompressedBlockLuma = 256;
1808 *MaxCompressedBlockLuma = 128;
1809 *IndependentBlockLuma = 128;
1811 *MaxUncompressedBlockLuma = 256;
1812 *MaxCompressedBlockLuma = 64;
1813 *IndependentBlockLuma = 64;
1816 if (RequestChroma == REQ_256Bytes) {
1817 *MaxUncompressedBlockChroma = 256;
1818 *MaxCompressedBlockChroma = 256;
1819 *IndependentBlockChroma = 0;
1820 } else if (RequestChroma == REQ_128BytesContiguous) {
1821 *MaxUncompressedBlockChroma = 256;
1822 *MaxCompressedBlockChroma = 128;
1823 *IndependentBlockChroma = 128;
1825 *MaxUncompressedBlockChroma = 256;
1826 *MaxCompressedBlockChroma = 64;
1827 *IndependentBlockChroma = 64;
1830 if (DCCEnabled != true || BytePerPixelC == 0) {
1831 *MaxUncompressedBlockChroma = 0;
1832 *MaxCompressedBlockChroma = 0;
1833 *IndependentBlockChroma = 0;
1836 if (DCCEnabled != true) {
1837 *MaxUncompressedBlockLuma = 0;
1838 *MaxCompressedBlockLuma = 0;
1839 *IndependentBlockLuma = 0;
1843 static double CalculatePrefetchSourceLines(
1844 struct display_mode_lib *mode_lib,
1848 bool ProgressiveToInterlaceUnitInOPP,
1849 unsigned int SwathHeight,
1850 unsigned int ViewportYStart,
1851 double *VInitPreFill,
1852 unsigned int *MaxNumSwath)
1854 struct vba_vars_st *v = &mode_lib->vba;
1855 unsigned int MaxPartialSwath;
1857 if (ProgressiveToInterlaceUnitInOPP)
1858 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1860 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1862 if (!v->IgnoreViewportPositioning) {
1864 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1866 if (*VInitPreFill > 1.0)
1867 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1869 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
1870 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1874 if (ViewportYStart != 0)
1875 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1877 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1879 if (*VInitPreFill > 1.0)
1880 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1882 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
1885 #ifdef __DML_VBA_DEBUG__
1886 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
1887 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
1888 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
1889 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
1890 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
1891 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
1892 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
1893 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
1894 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
1896 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1899 static unsigned int CalculateVMAndRowBytes(
1900 struct display_mode_lib *mode_lib,
1902 unsigned int BlockHeight256Bytes,
1903 unsigned int BlockWidth256Bytes,
1904 enum source_format_class SourcePixelFormat,
1905 unsigned int SurfaceTiling,
1906 unsigned int BytePerPixel,
1907 enum scan_direction_class ScanDirection,
1908 unsigned int SwathWidth,
1909 unsigned int ViewportHeight,
1912 unsigned int HostVMMaxNonCachedPageTableLevels,
1913 unsigned int GPUVMMinPageSize,
1914 unsigned int HostVMMinPageSize,
1915 unsigned int PTEBufferSizeInRequests,
1917 unsigned int DCCMetaPitch,
1918 unsigned int *MacroTileWidth,
1919 unsigned int *MetaRowByte,
1920 unsigned int *PixelPTEBytesPerRow,
1921 bool *PTEBufferSizeNotExceeded,
1922 int *dpte_row_width_ub,
1923 unsigned int *dpte_row_height,
1924 unsigned int *MetaRequestWidth,
1925 unsigned int *MetaRequestHeight,
1926 unsigned int *meta_row_width,
1927 unsigned int *meta_row_height,
1928 int *vm_group_bytes,
1929 unsigned int *dpte_group_bytes,
1930 unsigned int *PixelPTEReqWidth,
1931 unsigned int *PixelPTEReqHeight,
1932 unsigned int *PTERequestSize,
1933 int *DPDE0BytesFrame,
1934 int *MetaPTEBytesFrame)
1936 struct vba_vars_st *v = &mode_lib->vba;
1937 unsigned int MPDEBytesFrame;
1938 unsigned int DCCMetaSurfaceBytes;
1939 unsigned int MacroTileSizeBytes;
1940 unsigned int MacroTileHeight;
1941 unsigned int ExtraDPDEBytesFrame;
1942 unsigned int PDEAndMetaPTEBytesFrame;
1943 unsigned int PixelPTEReqHeightPTEs = 0;
1944 unsigned int HostVMDynamicLevels = 0;
1945 double FractionOfPTEReturnDrop;
1947 if (GPUVMEnable == true && HostVMEnable == true) {
1948 if (HostVMMinPageSize < 2048) {
1949 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1950 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1951 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1953 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1957 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1958 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1959 if (ScanDirection != dm_vert) {
1960 *meta_row_height = *MetaRequestHeight;
1961 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
1962 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1964 *meta_row_height = *MetaRequestWidth;
1965 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
1966 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1968 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1969 if (GPUVMEnable == true) {
1970 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1971 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
1973 *MetaPTEBytesFrame = 0;
1977 if (DCCEnable != true) {
1978 *MetaPTEBytesFrame = 0;
1983 if (SurfaceTiling == dm_sw_linear) {
1984 MacroTileSizeBytes = 256;
1985 MacroTileHeight = BlockHeight256Bytes;
1987 MacroTileSizeBytes = 65536;
1988 MacroTileHeight = 16 * BlockHeight256Bytes;
1990 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1992 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
1993 if (ScanDirection != dm_vert) {
1994 *DPDE0BytesFrame = 64
1996 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
2000 *DPDE0BytesFrame = 64
2002 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
2006 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
2008 *DPDE0BytesFrame = 0;
2009 ExtraDPDEBytesFrame = 0;
2012 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2014 #ifdef __DML_VBA_DEBUG__
2015 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
2016 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
2017 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
2018 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
2019 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2022 if (HostVMEnable == true) {
2023 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2025 #ifdef __DML_VBA_DEBUG__
2026 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2029 if (SurfaceTiling == dm_sw_linear) {
2030 PixelPTEReqHeightPTEs = 1;
2031 *PixelPTEReqHeight = 1;
2032 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
2033 *PTERequestSize = 64;
2034 FractionOfPTEReturnDrop = 0;
2035 } else if (MacroTileSizeBytes == 4096) {
2036 PixelPTEReqHeightPTEs = 1;
2037 *PixelPTEReqHeight = MacroTileHeight;
2038 *PixelPTEReqWidth = 8 * *MacroTileWidth;
2039 *PTERequestSize = 64;
2040 if (ScanDirection != dm_vert)
2041 FractionOfPTEReturnDrop = 0;
2043 FractionOfPTEReturnDrop = 7 / 8;
2044 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
2045 PixelPTEReqHeightPTEs = 16;
2046 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2047 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2048 *PTERequestSize = 128;
2049 FractionOfPTEReturnDrop = 0;
2051 PixelPTEReqHeightPTEs = 1;
2052 *PixelPTEReqHeight = MacroTileHeight;
2053 *PixelPTEReqWidth = 8 * *MacroTileWidth;
2054 *PTERequestSize = 64;
2055 FractionOfPTEReturnDrop = 0;
2058 if (SurfaceTiling == dm_sw_linear) {
2059 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2060 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2061 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2062 } else if (ScanDirection != dm_vert) {
2063 *dpte_row_height = *PixelPTEReqHeight;
2064 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2065 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2067 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
2068 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
2069 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2072 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
2073 *PTEBufferSizeNotExceeded = true;
2075 *PTEBufferSizeNotExceeded = false;
2078 if (GPUVMEnable != true) {
2079 *PixelPTEBytesPerRow = 0;
2080 *PTEBufferSizeNotExceeded = true;
2083 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
2085 if (HostVMEnable == true) {
2086 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2089 if (HostVMEnable == true) {
2090 *vm_group_bytes = 512;
2091 *dpte_group_bytes = 512;
2092 } else if (GPUVMEnable == true) {
2093 *vm_group_bytes = 2048;
2094 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
2095 *dpte_group_bytes = 512;
2097 *dpte_group_bytes = 2048;
2100 *vm_group_bytes = 0;
2101 *dpte_group_bytes = 0;
2103 return PDEAndMetaPTEBytesFrame;
2106 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
2108 struct vba_vars_st *v = &mode_lib->vba;
2110 double HostVMInefficiencyFactor = 1.0;
2111 bool NoChromaPlanes = true;
2113 double VMDataOnlyReturnBW;
2114 double MaxTotalRDBandwidth = 0;
2115 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
2117 v->WritebackDISPCLK = 0.0;
2118 v->DISPCLKWithRamping = 0;
2119 v->DISPCLKWithoutRamping = 0;
2120 v->GlobalDPPCLK = 0.0;
2121 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */
2123 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
2124 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
2125 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
2126 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
2127 if (v->HostVMEnable != true) {
2128 v->ReturnBW = dml_min(
2129 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2130 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
2132 v->ReturnBW = dml_min(
2133 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2134 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
2137 /* End DAL custom code */
2139 // DISPCLK and DPPCLK Calculation
2141 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2142 if (v->WritebackEnable[k]) {
2143 v->WritebackDISPCLK = dml_max(
2144 v->WritebackDISPCLK,
2145 dml31_CalculateWriteBackDISPCLK(
2146 v->WritebackPixelFormat[k],
2148 v->WritebackHRatio[k],
2149 v->WritebackVRatio[k],
2150 v->WritebackHTaps[k],
2151 v->WritebackVTaps[k],
2152 v->WritebackSourceWidth[k],
2153 v->WritebackDestinationWidth[k],
2155 v->WritebackLineBufferSize));
2159 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2160 if (v->HRatio[k] > 1) {
2161 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
2162 v->MaxDCHUBToPSCLThroughput,
2163 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
2165 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2168 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
2170 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
2171 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
2173 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
2174 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
2177 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
2178 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
2179 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
2180 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
2182 if (v->HRatioChroma[k] > 1) {
2183 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
2184 v->MaxDCHUBToPSCLThroughput,
2185 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
2187 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2189 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2191 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
2192 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
2195 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
2196 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
2199 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma);
2203 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2204 if (v->BlendingAndTiming[k] != k)
2206 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2207 v->DISPCLKWithRamping = dml_max(
2208 v->DISPCLKWithRamping,
2209 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2210 * (1 + v->DISPCLKRampingMargin / 100));
2211 v->DISPCLKWithoutRamping = dml_max(
2212 v->DISPCLKWithoutRamping,
2213 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2214 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2215 v->DISPCLKWithRamping = dml_max(
2216 v->DISPCLKWithRamping,
2217 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2218 * (1 + v->DISPCLKRampingMargin / 100));
2219 v->DISPCLKWithoutRamping = dml_max(
2220 v->DISPCLKWithoutRamping,
2221 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2223 v->DISPCLKWithRamping = dml_max(
2224 v->DISPCLKWithRamping,
2225 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
2226 v->DISPCLKWithoutRamping = dml_max(
2227 v->DISPCLKWithoutRamping,
2228 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2232 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK);
2233 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK);
2235 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2236 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed);
2237 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed);
2238 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2239 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
2240 v->DISPCLKDPPCLKVCOSpeed);
2241 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2242 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2243 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2244 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2246 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
2248 v->DISPCLK = v->DISPCLK_calculated;
2249 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2251 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2252 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2253 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]);
2255 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed);
2256 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2257 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1);
2258 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2261 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2262 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2265 // Urgent and B P-State/DRAM Clock Change Watermark
2266 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2267 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2269 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2270 CalculateBytePerPixelAnd256BBlockSizes(
2271 v->SourcePixelFormat[k],
2272 v->SurfaceTiling[k],
2273 &v->BytePerPixelY[k],
2274 &v->BytePerPixelC[k],
2275 &v->BytePerPixelDETY[k],
2276 &v->BytePerPixelDETC[k],
2277 &v->BlockHeight256BytesY[k],
2278 &v->BlockHeight256BytesC[k],
2279 &v->BlockWidth256BytesY[k],
2280 &v->BlockWidth256BytesC[k]);
2283 CalculateSwathWidth(
2285 v->NumberOfActivePlanes,
2286 v->SourcePixelFormat,
2294 v->ODMCombineEnabled,
2297 v->BlockHeight256BytesY,
2298 v->BlockHeight256BytesC,
2299 v->BlockWidth256BytesY,
2300 v->BlockWidth256BytesC,
2301 v->BlendingAndTiming,
2305 v->SwathWidthSingleDPPY,
2306 v->SwathWidthSingleDPPC,
2311 v->swath_width_luma_ub,
2312 v->swath_width_chroma_ub);
2314 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2315 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
2317 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
2318 * v->VRatioChroma[k];
2319 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2322 // DCFCLK Deep Sleep
2323 CalculateDCFCLKDeepSleep(
2325 v->NumberOfActivePlanes,
2336 v->PSCL_THROUGHPUT_LUMA,
2337 v->PSCL_THROUGHPUT_CHROMA,
2339 v->ReadBandwidthPlaneLuma,
2340 v->ReadBandwidthPlaneChroma,
2342 &v->DCFCLKDeepSleep);
2345 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2346 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2347 v->DSCCLK_calculated[k] = 0.0;
2349 if (v->OutputFormat[k] == dm_420)
2350 v->DSCFormatFactor = 2;
2351 else if (v->OutputFormat[k] == dm_444)
2352 v->DSCFormatFactor = 1;
2353 else if (v->OutputFormat[k] == dm_n422)
2354 v->DSCFormatFactor = 2;
2356 v->DSCFormatFactor = 1;
2357 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2358 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
2359 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2360 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2361 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
2362 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2364 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
2365 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2370 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2371 double BPP = v->OutputBpp[k];
2373 if (v->DSCEnabled[k] && BPP != 0) {
2374 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2375 v->DSCDelay[k] = dscceComputeDelay(
2376 v->DSCInputBitPerComponent[k],
2378 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2379 v->NumberOfDSCSlices[k],
2381 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2382 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2384 * (dscceComputeDelay(
2385 v->DSCInputBitPerComponent[k],
2387 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2388 v->NumberOfDSCSlices[k] / 2.0,
2390 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2393 * (dscceComputeDelay(
2394 v->DSCInputBitPerComponent[k],
2396 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2397 v->NumberOfDSCSlices[k] / 4.0,
2399 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2401 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2407 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2408 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2409 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
2410 v->DSCDelay[k] = v->DSCDelay[j];
2413 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2414 unsigned int PDEAndMetaPTEBytesFrameY;
2415 unsigned int PixelPTEBytesPerRowY;
2416 unsigned int MetaRowByteY;
2417 unsigned int MetaRowByteC;
2418 unsigned int PDEAndMetaPTEBytesFrameC;
2419 unsigned int PixelPTEBytesPerRowC;
2420 bool PTEBufferSizeNotExceededY;
2421 bool PTEBufferSizeNotExceededC;
2423 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2424 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2425 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2426 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2427 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2429 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2430 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2433 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2436 v->BlockHeight256BytesC[k],
2437 v->BlockWidth256BytesC[k],
2438 v->SourcePixelFormat[k],
2439 v->SurfaceTiling[k],
2440 v->BytePerPixelC[k],
2443 v->ViewportHeightChroma[k],
2446 v->HostVMMaxNonCachedPageTableLevels,
2447 v->GPUVMMinPageSize,
2448 v->HostVMMinPageSize,
2449 v->PTEBufferSizeInRequestsForChroma,
2451 v->DCCMetaPitchC[k],
2452 &v->MacroTileWidthC[k],
2454 &PixelPTEBytesPerRowC,
2455 &PTEBufferSizeNotExceededC,
2456 &v->dpte_row_width_chroma_ub[k],
2457 &v->dpte_row_height_chroma[k],
2458 &v->meta_req_width_chroma[k],
2459 &v->meta_req_height_chroma[k],
2460 &v->meta_row_width_chroma[k],
2461 &v->meta_row_height_chroma[k],
2464 &v->PixelPTEReqWidthC[k],
2465 &v->PixelPTEReqHeightC[k],
2466 &v->PTERequestSizeC[k],
2467 &v->dpde0_bytes_per_frame_ub_c[k],
2468 &v->meta_pte_bytes_per_frame_ub_c[k]);
2470 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2475 v->ProgressiveToInterlaceUnitInOPP,
2477 v->ViewportYStartC[k],
2478 &v->VInitPreFillC[k],
2479 &v->MaxNumSwathC[k]);
2481 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2482 v->PTEBufferSizeInRequestsForChroma = 0;
2483 PixelPTEBytesPerRowC = 0;
2484 PDEAndMetaPTEBytesFrameC = 0;
2486 v->MaxNumSwathC[k] = 0;
2487 v->PrefetchSourceLinesC[k] = 0;
2490 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2493 v->BlockHeight256BytesY[k],
2494 v->BlockWidth256BytesY[k],
2495 v->SourcePixelFormat[k],
2496 v->SurfaceTiling[k],
2497 v->BytePerPixelY[k],
2500 v->ViewportHeight[k],
2503 v->HostVMMaxNonCachedPageTableLevels,
2504 v->GPUVMMinPageSize,
2505 v->HostVMMinPageSize,
2506 v->PTEBufferSizeInRequestsForLuma,
2508 v->DCCMetaPitchY[k],
2509 &v->MacroTileWidthY[k],
2511 &PixelPTEBytesPerRowY,
2512 &PTEBufferSizeNotExceededY,
2513 &v->dpte_row_width_luma_ub[k],
2514 &v->dpte_row_height[k],
2515 &v->meta_req_width[k],
2516 &v->meta_req_height[k],
2517 &v->meta_row_width[k],
2518 &v->meta_row_height[k],
2519 &v->vm_group_bytes[k],
2520 &v->dpte_group_bytes[k],
2521 &v->PixelPTEReqWidthY[k],
2522 &v->PixelPTEReqHeightY[k],
2523 &v->PTERequestSizeY[k],
2524 &v->dpde0_bytes_per_frame_ub_l[k],
2525 &v->meta_pte_bytes_per_frame_ub_l[k]);
2527 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2532 v->ProgressiveToInterlaceUnitInOPP,
2534 v->ViewportYStartY[k],
2535 &v->VInitPreFillY[k],
2536 &v->MaxNumSwathY[k]);
2537 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2538 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2539 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2541 CalculateRowBandwidth(
2543 v->SourcePixelFormat[k],
2547 v->HTotal[k] / v->PixelClock[k],
2550 v->meta_row_height[k],
2551 v->meta_row_height_chroma[k],
2552 PixelPTEBytesPerRowY,
2553 PixelPTEBytesPerRowC,
2554 v->dpte_row_height[k],
2555 v->dpte_row_height_chroma[k],
2557 &v->dpte_row_bw[k]);
2560 v->TotalDCCActiveDPP = 0;
2561 v->TotalActiveDPP = 0;
2562 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2563 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
2564 if (v->DCCEnable[k])
2565 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
2566 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2567 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
2568 NoChromaPlanes = false;
2571 ReorderBytes = v->NumberOfChannels
2573 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2574 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2575 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2577 VMDataOnlyReturnBW = dml_min(
2578 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn)
2579 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2580 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
2581 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
2583 #ifdef __DML_VBA_DEBUG__
2584 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
2585 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
2586 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
2587 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
2588 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
2589 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
2590 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
2591 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
2592 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
2593 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
2594 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
2597 if (v->GPUVMEnable && v->HostVMEnable)
2598 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
2600 v->UrgentExtraLatency = CalculateExtraLatency(
2601 v->RoundTripPingLatencyCycles,
2605 v->PixelChunkSizeInKByte,
2606 v->TotalDCCActiveDPP,
2611 v->NumberOfActivePlanes,
2613 v->dpte_group_bytes,
2614 HostVMInefficiencyFactor,
2615 v->HostVMMinPageSize,
2616 v->HostVMMaxNonCachedPageTableLevels);
2618 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2620 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2621 if (v->BlendingAndTiming[k] == k) {
2622 if (v->WritebackEnable[k] == true) {
2623 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
2624 + CalculateWriteBackDelay(
2625 v->WritebackPixelFormat[k],
2626 v->WritebackHRatio[k],
2627 v->WritebackVRatio[k],
2628 v->WritebackVTaps[k],
2629 v->WritebackDestinationWidth[k],
2630 v->WritebackDestinationHeight[k],
2631 v->WritebackSourceHeight[k],
2632 v->HTotal[k]) / v->DISPCLK;
2634 v->WritebackDelay[v->VoltageLevel][k] = 0;
2635 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2636 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
2637 v->WritebackDelay[v->VoltageLevel][k] = dml_max(
2638 v->WritebackDelay[v->VoltageLevel][k],
2640 + CalculateWriteBackDelay(
2641 v->WritebackPixelFormat[j],
2642 v->WritebackHRatio[j],
2643 v->WritebackVRatio[j],
2644 v->WritebackVTaps[j],
2645 v->WritebackDestinationWidth[j],
2646 v->WritebackDestinationHeight[j],
2647 v->WritebackSourceHeight[j],
2648 v->HTotal[k]) / v->DISPCLK);
2654 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2655 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2656 if (v->BlendingAndTiming[k] == j)
2657 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2659 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2660 v->MaxVStartupLines[k] =
2661 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
2662 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
2663 v->VTotal[k] - v->VActive[k]
2667 (double) v->WritebackDelay[v->VoltageLevel][k]
2668 / (v->HTotal[k] / v->PixelClock[k]),
2670 if (v->MaxVStartupLines[k] > 1023)
2671 v->MaxVStartupLines[k] = 1023;
2673 #ifdef __DML_VBA_DEBUG__
2674 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
2675 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
2676 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
2680 v->MaximumMaxVStartupLines = 0;
2681 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2682 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2685 // We don't really care to iterate between the various prefetch modes
2686 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
2688 v->UrgentLatency = CalculateUrgentLatency(
2689 v->UrgentLatencyPixelDataOnly,
2690 v->UrgentLatencyPixelMixedWithVMData,
2691 v->UrgentLatencyVMDataOnly,
2692 v->DoUrgentLatencyAdjustment,
2693 v->UrgentLatencyAdjustmentFabricClockComponent,
2694 v->UrgentLatencyAdjustmentFabricClockReference,
2697 v->FractionOfUrgentBandwidth = 0.0;
2698 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2700 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
2703 double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
2704 bool DestinationLineTimesForPrefetchLessThan2 = false;
2705 bool VRatioPrefetchMoreThan4 = false;
2706 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime);
2707 MaxTotalRDBandwidth = 0;
2709 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
2711 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2714 myPipe.DPPCLK = v->DPPCLK[k];
2715 myPipe.DISPCLK = v->DISPCLK;
2716 myPipe.PixelClock = v->PixelClock[k];
2717 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2718 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2719 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2720 myPipe.SourceScan = v->SourceScan[k];
2721 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2722 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2723 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2724 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2725 myPipe.InterlaceEnable = v->Interlace[k];
2726 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2727 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2728 myPipe.HTotal = v->HTotal[k];
2729 myPipe.DCCEnable = v->DCCEnable[k];
2730 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
2731 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
2732 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
2733 myPipe.BytePerPixelY = v->BytePerPixelY[k];
2734 myPipe.BytePerPixelC = v->BytePerPixelC[k];
2735 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
2736 v->ErrorResult[k] = CalculatePrefetchSchedule(
2738 HostVMInefficiencyFactor,
2741 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
2743 v->DPPCLKDelaySCLLBOnly,
2744 v->DPPCLKDelayCNVCCursor,
2745 v->DISPCLKDelaySubtotal,
2746 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2748 v->MaxInterDCNTileRepeaters,
2749 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2750 v->MaxVStartupLines[k],
2751 v->GPUVMMaxPageTableLevels,
2754 v->HostVMMaxNonCachedPageTableLevels,
2755 v->HostVMMinPageSize,
2756 v->DynamicMetadataEnable[k],
2757 v->DynamicMetadataVMEnabled,
2758 v->DynamicMetadataLinesBeforeActiveRequired[k],
2759 v->DynamicMetadataTransmittedBytes[k],
2761 v->UrgentExtraLatency,
2763 v->PDEAndMetaPTEBytesFrame[k],
2765 v->PixelPTEBytesPerRow[k],
2766 v->PrefetchSourceLinesY[k],
2768 v->VInitPreFillY[k],
2770 v->PrefetchSourceLinesC[k],
2772 v->VInitPreFillC[k],
2774 v->swath_width_luma_ub[k],
2775 v->swath_width_chroma_ub[k],
2779 &v->DSTXAfterScaler[k],
2780 &v->DSTYAfterScaler[k],
2781 &v->DestinationLinesForPrefetch[k],
2782 &v->PrefetchBandwidth[k],
2783 &v->DestinationLinesToRequestVMInVBlank[k],
2784 &v->DestinationLinesToRequestRowInVBlank[k],
2785 &v->VRatioPrefetchY[k],
2786 &v->VRatioPrefetchC[k],
2787 &v->RequiredPrefetchPixDataBWLuma[k],
2788 &v->RequiredPrefetchPixDataBWChroma[k],
2789 &v->NotEnoughTimeForDynamicMetadata[k],
2791 &v->prefetch_vmrow_bw[k],
2795 &v->VUpdateOffsetPix[k],
2796 &v->VUpdateWidthPix[k],
2797 &v->VReadyOffsetPix[k]);
2799 #ifdef __DML_VBA_DEBUG__
2800 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
2802 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2805 v->NoEnoughUrgentLatencyHiding = false;
2806 v->NoEnoughUrgentLatencyHidingPre = false;
2808 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2809 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2810 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2811 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2812 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
2814 CalculateUrgentBurstFactor(
2815 v->swath_width_luma_ub[k],
2816 v->swath_width_chroma_ub[k],
2819 v->HTotal[k] / v->PixelClock[k],
2821 v->CursorBufferSize,
2822 v->CursorWidth[k][0],
2826 v->BytePerPixelDETY[k],
2827 v->BytePerPixelDETC[k],
2828 v->DETBufferSizeY[k],
2829 v->DETBufferSizeC[k],
2830 &v->UrgBurstFactorCursor[k],
2831 &v->UrgBurstFactorLuma[k],
2832 &v->UrgBurstFactorChroma[k],
2833 &v->NoUrgentLatencyHiding[k]);
2835 CalculateUrgentBurstFactor(
2836 v->swath_width_luma_ub[k],
2837 v->swath_width_chroma_ub[k],
2840 v->HTotal[k] / v->PixelClock[k],
2842 v->CursorBufferSize,
2843 v->CursorWidth[k][0],
2845 v->VRatioPrefetchY[k],
2846 v->VRatioPrefetchC[k],
2847 v->BytePerPixelDETY[k],
2848 v->BytePerPixelDETC[k],
2849 v->DETBufferSizeY[k],
2850 v->DETBufferSizeC[k],
2851 &v->UrgBurstFactorCursorPre[k],
2852 &v->UrgBurstFactorLumaPre[k],
2853 &v->UrgBurstFactorChromaPre[k],
2854 &v->NoUrgentLatencyHidingPre[k]);
2856 MaxTotalRDBandwidth = MaxTotalRDBandwidth
2858 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2859 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2860 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2861 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
2862 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2864 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2865 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2866 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2868 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
2870 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2871 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
2872 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2873 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
2874 + v->cursor_bw_pre[k]);
2876 #ifdef __DML_VBA_DEBUG__
2877 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
2878 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
2879 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
2880 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
2881 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
2883 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
2884 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
2886 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
2887 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
2888 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
2889 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
2890 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
2891 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
2892 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
2893 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
2894 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
2895 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
2898 if (v->DestinationLinesForPrefetch[k] < 2)
2899 DestinationLineTimesForPrefetchLessThan2 = true;
2901 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2902 VRatioPrefetchMoreThan4 = true;
2904 if (v->NoUrgentLatencyHiding[k] == true)
2905 v->NoEnoughUrgentLatencyHiding = true;
2907 if (v->NoUrgentLatencyHidingPre[k] == true)
2908 v->NoEnoughUrgentLatencyHidingPre = true;
2911 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2913 #ifdef __DML_VBA_DEBUG__
2914 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
2915 dml_print("DML::%s: ReturnBW=%f \n", __func__, v->ReturnBW);
2916 dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, v->FractionOfUrgentBandwidth);
2919 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
2920 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
2921 v->PrefetchModeSupported = true;
2923 v->PrefetchModeSupported = false;
2924 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
2925 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
2926 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
2927 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2931 // This error result check was done after the PrefetchModeSupported. So we will
2932 // still try to calculate flip schedule even prefetch mode not supported
2933 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2934 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
2935 v->PrefetchModeSupported = false;
2936 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
2940 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2941 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2942 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2943 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
2945 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2946 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2947 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2949 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2950 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2951 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2954 v->TotImmediateFlipBytes = 0;
2955 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2956 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
2957 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2959 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2960 CalculateFlipSchedule(
2962 HostVMInefficiencyFactor,
2963 v->UrgentExtraLatency,
2965 v->GPUVMMaxPageTableLevels,
2967 v->HostVMMaxNonCachedPageTableLevels,
2969 v->HostVMMinPageSize,
2970 v->PDEAndMetaPTEBytesFrame[k],
2972 v->PixelPTEBytesPerRow[k],
2973 v->BandwidthAvailableForImmediateFlip,
2974 v->TotImmediateFlipBytes,
2975 v->SourcePixelFormat[k],
2976 v->HTotal[k] / v->PixelClock[k],
2981 v->dpte_row_height[k],
2982 v->meta_row_height[k],
2983 v->dpte_row_height_chroma[k],
2984 v->meta_row_height_chroma[k],
2985 &v->DestinationLinesToRequestVMInImmediateFlip[k],
2986 &v->DestinationLinesToRequestRowInImmediateFlip[k],
2987 &v->final_flip_bw[k],
2988 &v->ImmediateFlipSupportedForPipe[k]);
2991 v->total_dcn_read_bw_with_flip = 0.0;
2992 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2993 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2994 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
2996 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2997 v->DPPPerPlane[k] * v->final_flip_bw[k]
2998 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
2999 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
3000 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
3002 * (v->final_flip_bw[k]
3003 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
3004 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
3005 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
3006 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
3008 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
3009 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
3010 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
3012 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
3013 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
3015 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
3017 v->ImmediateFlipSupported = true;
3018 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
3019 #ifdef __DML_VBA_DEBUG__
3020 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
3022 v->ImmediateFlipSupported = false;
3023 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
3025 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3026 if (v->ImmediateFlipSupportedForPipe[k] == false) {
3027 #ifdef __DML_VBA_DEBUG__
3028 dml_print("DML::%s: Pipe %0d not supporting iflip\n",
3031 v->ImmediateFlipSupported = false;
3035 v->ImmediateFlipSupported = false;
3038 v->PrefetchAndImmediateFlipSupported =
3039 (v->PrefetchModeSupported == true
3040 && ((!v->ImmediateFlipSupport && !v->HostVMEnable
3041 && v->ImmediateFlipRequirement != dm_immediate_flip_required) || v->ImmediateFlipSupported)) ?
3043 #ifdef __DML_VBA_DEBUG__
3044 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
3045 dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required);
3046 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
3047 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
3048 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
3049 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
3051 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
3053 v->VStartupLines = v->VStartupLines + 1;
3054 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
3055 ASSERT(v->PrefetchAndImmediateFlipSupported);
3057 // Unbounded Request Enabled
3058 CalculateUnboundedRequestAndCompressedBufferSize(
3059 v->DETBufferSizeInKByte[0],
3060 v->ConfigReturnBufferSizeInKByte,
3061 v->UseUnboundedRequesting,
3065 v->CompressedBufferSegmentSizeInkByte,
3067 &v->UnboundedRequestEnabled,
3068 &v->CompressedBufferSizeInkByte);
3070 //Watermarks and NB P-State/DRAM Clock Change Support
3072 enum clock_change_support DRAMClockChangeSupport; // dummy
3073 CalculateWatermarksAndDRAMSpeedChangeSupport(
3076 v->NumberOfActivePlanes,
3077 v->MaxLineBufferLines,
3079 v->WritebackInterfaceBufferSize,
3082 v->SynchronizedVBlank,
3083 v->dpte_group_bytes,
3086 v->UrgentExtraLatency,
3087 v->WritebackLatency,
3088 v->WritebackChunkSize,
3090 v->DRAMClockChangeLatency,
3092 v->SREnterPlusExitTime,
3094 v->SREnterPlusExitZ8Time,
3111 v->BlendingAndTiming,
3113 v->BytePerPixelDETY,
3114 v->BytePerPixelDETC,
3118 v->WritebackPixelFormat,
3119 v->WritebackDestinationWidth,
3120 v->WritebackDestinationHeight,
3121 v->WritebackSourceHeight,
3122 v->UnboundedRequestEnabled,
3123 v->CompressedBufferSizeInkByte,
3124 &DRAMClockChangeSupport,
3125 &v->UrgentWatermark,
3126 &v->WritebackUrgentWatermark,
3127 &v->DRAMClockChangeWatermark,
3128 &v->WritebackDRAMClockChangeWatermark,
3129 &v->StutterExitWatermark,
3130 &v->StutterEnterPlusExitWatermark,
3131 &v->Z8StutterExitWatermark,
3132 &v->Z8StutterEnterPlusExitWatermark,
3133 &v->MinActiveDRAMClockChangeLatencySupported);
3135 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3136 if (v->WritebackEnable[k] == true) {
3137 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
3139 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
3141 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
3146 //Display Pipeline Delivery Time in Prefetch, Groups
3147 CalculatePixelDeliveryTimes(
3148 v->NumberOfActivePlanes,
3153 v->swath_width_luma_ub,
3154 v->swath_width_chroma_ub,
3159 v->PSCL_THROUGHPUT_LUMA,
3160 v->PSCL_THROUGHPUT_CHROMA,
3167 v->BlockWidth256BytesY,
3168 v->BlockHeight256BytesY,
3169 v->BlockWidth256BytesC,
3170 v->BlockHeight256BytesC,
3171 v->DisplayPipeLineDeliveryTimeLuma,
3172 v->DisplayPipeLineDeliveryTimeChroma,
3173 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
3174 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
3175 v->DisplayPipeRequestDeliveryTimeLuma,
3176 v->DisplayPipeRequestDeliveryTimeChroma,
3177 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
3178 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
3179 v->CursorRequestDeliveryTime,
3180 v->CursorRequestDeliveryTimePrefetch);
3182 CalculateMetaAndPTETimes(
3183 v->NumberOfActivePlanes,
3186 v->MinMetaChunkSizeBytes,
3190 v->DestinationLinesToRequestRowInVBlank,
3191 v->DestinationLinesToRequestRowInImmediateFlip,
3198 v->dpte_row_height_chroma,
3200 v->meta_row_width_chroma,
3202 v->meta_row_height_chroma,
3204 v->meta_req_width_chroma,
3206 v->meta_req_height_chroma,
3207 v->dpte_group_bytes,
3210 v->PixelPTEReqWidthY,
3211 v->PixelPTEReqHeightY,
3212 v->PixelPTEReqWidthC,
3213 v->PixelPTEReqHeightC,
3214 v->dpte_row_width_luma_ub,
3215 v->dpte_row_width_chroma_ub,
3216 v->DST_Y_PER_PTE_ROW_NOM_L,
3217 v->DST_Y_PER_PTE_ROW_NOM_C,
3218 v->DST_Y_PER_META_ROW_NOM_L,
3219 v->DST_Y_PER_META_ROW_NOM_C,
3220 v->TimePerMetaChunkNominal,
3221 v->TimePerChromaMetaChunkNominal,
3222 v->TimePerMetaChunkVBlank,
3223 v->TimePerChromaMetaChunkVBlank,
3224 v->TimePerMetaChunkFlip,
3225 v->TimePerChromaMetaChunkFlip,
3226 v->time_per_pte_group_nom_luma,
3227 v->time_per_pte_group_vblank_luma,
3228 v->time_per_pte_group_flip_luma,
3229 v->time_per_pte_group_nom_chroma,
3230 v->time_per_pte_group_vblank_chroma,
3231 v->time_per_pte_group_flip_chroma);
3233 CalculateVMGroupAndRequestTimes(
3234 v->NumberOfActivePlanes,
3236 v->GPUVMMaxPageTableLevels,
3239 v->DestinationLinesToRequestVMInVBlank,
3240 v->DestinationLinesToRequestVMInImmediateFlip,
3243 v->dpte_row_width_luma_ub,
3244 v->dpte_row_width_chroma_ub,
3246 v->dpde0_bytes_per_frame_ub_l,
3247 v->dpde0_bytes_per_frame_ub_c,
3248 v->meta_pte_bytes_per_frame_ub_l,
3249 v->meta_pte_bytes_per_frame_ub_c,
3250 v->TimePerVMGroupVBlank,
3251 v->TimePerVMGroupFlip,
3252 v->TimePerVMRequestVBlank,
3253 v->TimePerVMRequestFlip);
3256 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3257 if (PrefetchMode == 0) {
3258 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3259 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3260 v->MinTTUVBlank[k] = dml_max(
3261 v->DRAMClockChangeWatermark,
3262 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark));
3263 } else if (PrefetchMode == 1) {
3264 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3265 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3266 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark);
3268 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3269 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3270 v->MinTTUVBlank[k] = v->UrgentWatermark;
3272 if (!v->DynamicMetadataEnable[k])
3273 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
3276 // DCC Configuration
3278 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3279 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3280 v->SourcePixelFormat[k],
3281 v->SurfaceWidthY[k],
3282 v->SurfaceWidthC[k],
3283 v->SurfaceHeightY[k],
3284 v->SurfaceHeightC[k],
3285 v->DETBufferSizeInKByte[0] * 1024,
3286 v->BlockHeight256BytesY[k],
3287 v->BlockHeight256BytesC[k],
3288 v->SurfaceTiling[k],
3289 v->BytePerPixelY[k],
3290 v->BytePerPixelC[k],
3291 v->BytePerPixelDETY[k],
3292 v->BytePerPixelDETC[k],
3294 &v->DCCYMaxUncompressedBlock[k],
3295 &v->DCCCMaxUncompressedBlock[k],
3296 &v->DCCYMaxCompressedBlock[k],
3297 &v->DCCCMaxCompressedBlock[k],
3298 &v->DCCYIndependentBlock[k],
3299 &v->DCCCIndependentBlock[k]);
3302 // VStartup Adjustment
3303 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3304 bool isInterlaceTiming;
3305 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
3306 #ifdef __DML_VBA_DEBUG__
3307 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
3310 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
3312 #ifdef __DML_VBA_DEBUG__
3313 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
3314 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
3315 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3316 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
3319 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
3320 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
3321 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
3324 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
3326 v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((v->VTotal[k] - v->VFrontPorch[k]) / 2.0, 1.0) : v->VTotal[k])
3327 - v->VFrontPorch[k])
3328 + dml_max(1.0, dml_ceil(v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1.0))
3329 + dml_floor(4.0 * v->TSetup[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0;
3331 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
3333 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
3334 <= (isInterlaceTiming ?
3335 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
3336 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
3337 v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
3339 v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
3341 #ifdef __DML_VBA_DEBUG__
3342 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
3343 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
3344 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
3345 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
3346 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
3347 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
3348 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
3349 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
3350 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3351 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
3352 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
3357 //Maximum Bandwidth Used
3358 double TotalWRBandwidth = 0;
3359 double MaxPerPlaneVActiveWRBandwidth = 0;
3360 double WRBandwidth = 0;
3361 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3362 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
3363 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3364 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3365 } else if (v->WritebackEnable[k] == true) {
3366 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3367 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3369 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3370 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3373 v->TotalDataReadBandwidth = 0;
3374 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3375 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
3378 // Stutter Efficiency
3379 CalculateStutterEfficiency(
3381 v->CompressedBufferSizeInkByte,
3382 v->UnboundedRequestEnabled,
3383 v->ConfigReturnBufferSizeInKByte,
3384 v->MetaFIFOSizeInKEntries,
3385 v->ZeroSizeBufferEntries,
3386 v->NumberOfActivePlanes,
3387 v->ROBBufferSizeInKByte,
3388 v->TotalDataReadBandwidth,
3391 v->COMPBUF_RESERVED_SPACE_64B,
3392 v->COMPBUF_RESERVED_SPACE_ZS,
3395 v->SynchronizedVBlank,
3396 v->StutterEnterPlusExitWatermark,
3397 v->Z8StutterEnterPlusExitWatermark,
3398 v->ProgressiveToInterlaceUnitInOPP,
3404 v->BytePerPixelDETY,
3410 v->DCCFractionOfZeroSizeRequestsLuma,
3411 v->DCCFractionOfZeroSizeRequestsChroma,
3417 v->BlockHeight256BytesY,
3418 v->BlockWidth256BytesY,
3419 v->BlockHeight256BytesC,
3420 v->BlockWidth256BytesC,
3421 v->DCCYMaxUncompressedBlock,
3422 v->DCCCMaxUncompressedBlock,
3426 v->ReadBandwidthPlaneLuma,
3427 v->ReadBandwidthPlaneChroma,
3430 &v->StutterEfficiencyNotIncludingVBlank,
3431 &v->StutterEfficiency,
3432 &v->NumberOfStutterBurstsPerFrame,
3433 &v->Z8StutterEfficiencyNotIncludingVBlank,
3434 &v->Z8StutterEfficiency,
3435 &v->Z8NumberOfStutterBurstsPerFrame,
3439 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3441 struct vba_vars_st *v = &mode_lib->vba;
3442 // Display Pipe Configuration
3443 double BytePerPixDETY[DC__NUM_DPP__MAX];
3444 double BytePerPixDETC[DC__NUM_DPP__MAX];
3445 int BytePerPixY[DC__NUM_DPP__MAX];
3446 int BytePerPixC[DC__NUM_DPP__MAX];
3447 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
3448 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
3449 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
3450 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
3451 double dummy1[DC__NUM_DPP__MAX];
3452 double dummy2[DC__NUM_DPP__MAX];
3453 double dummy3[DC__NUM_DPP__MAX];
3454 double dummy4[DC__NUM_DPP__MAX];
3455 int dummy5[DC__NUM_DPP__MAX];
3456 int dummy6[DC__NUM_DPP__MAX];
3457 bool dummy7[DC__NUM_DPP__MAX];
3458 bool dummysinglestring;
3462 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3464 CalculateBytePerPixelAnd256BBlockSizes(
3465 v->SourcePixelFormat[k],
3466 v->SurfaceTiling[k],
3471 &Read256BytesBlockHeightY[k],
3472 &Read256BytesBlockHeightC[k],
3473 &Read256BytesBlockWidthY[k],
3474 &Read256BytesBlockWidthC[k]);
3477 CalculateSwathAndDETConfiguration(
3479 v->NumberOfActivePlanes,
3480 v->DETBufferSizeInKByte[0],
3484 v->SourcePixelFormat,
3492 Read256BytesBlockHeightY,
3493 Read256BytesBlockHeightC,
3494 Read256BytesBlockWidthY,
3495 Read256BytesBlockWidthC,
3496 v->ODMCombineEnabled,
3497 v->BlendingAndTiming,
3515 &dummysinglestring);
3518 static bool CalculateBytePerPixelAnd256BBlockSizes(
3519 enum source_format_class SourcePixelFormat,
3520 enum dm_swizzle_mode SurfaceTiling,
3521 unsigned int *BytePerPixelY,
3522 unsigned int *BytePerPixelC,
3523 double *BytePerPixelDETY,
3524 double *BytePerPixelDETC,
3525 unsigned int *BlockHeight256BytesY,
3526 unsigned int *BlockHeight256BytesC,
3527 unsigned int *BlockWidth256BytesY,
3528 unsigned int *BlockWidth256BytesC)
3530 if (SourcePixelFormat == dm_444_64) {
3531 *BytePerPixelDETY = 8;
3532 *BytePerPixelDETC = 0;
3535 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3536 *BytePerPixelDETY = 4;
3537 *BytePerPixelDETC = 0;
3540 } else if (SourcePixelFormat == dm_444_16) {
3541 *BytePerPixelDETY = 2;
3542 *BytePerPixelDETC = 0;
3545 } else if (SourcePixelFormat == dm_444_8) {
3546 *BytePerPixelDETY = 1;
3547 *BytePerPixelDETC = 0;
3550 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3551 *BytePerPixelDETY = 4;
3552 *BytePerPixelDETC = 1;
3555 } else if (SourcePixelFormat == dm_420_8) {
3556 *BytePerPixelDETY = 1;
3557 *BytePerPixelDETC = 2;
3560 } else if (SourcePixelFormat == dm_420_12) {
3561 *BytePerPixelDETY = 2;
3562 *BytePerPixelDETC = 4;
3566 *BytePerPixelDETY = 4.0 / 3;
3567 *BytePerPixelDETC = 8.0 / 3;
3572 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
3573 || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
3574 if (SurfaceTiling == dm_sw_linear) {
3575 *BlockHeight256BytesY = 1;
3576 } else if (SourcePixelFormat == dm_444_64) {
3577 *BlockHeight256BytesY = 4;
3578 } else if (SourcePixelFormat == dm_444_8) {
3579 *BlockHeight256BytesY = 16;
3581 *BlockHeight256BytesY = 8;
3583 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3584 *BlockHeight256BytesC = 0;
3585 *BlockWidth256BytesC = 0;
3587 if (SurfaceTiling == dm_sw_linear) {
3588 *BlockHeight256BytesY = 1;
3589 *BlockHeight256BytesC = 1;
3590 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3591 *BlockHeight256BytesY = 8;
3592 *BlockHeight256BytesC = 16;
3593 } else if (SourcePixelFormat == dm_420_8) {
3594 *BlockHeight256BytesY = 16;
3595 *BlockHeight256BytesC = 8;
3597 *BlockHeight256BytesY = 8;
3598 *BlockHeight256BytesC = 8;
3600 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3601 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3606 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
3608 if (PrefetchMode == 0) {
3609 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency));
3610 } else if (PrefetchMode == 1) {
3611 return dml_max(SREnterPlusExitTime, UrgentLatency);
3613 return UrgentLatency;
3617 double dml31_CalculateWriteBackDISPCLK(
3618 enum source_format_class WritebackPixelFormat,
3620 double WritebackHRatio,
3621 double WritebackVRatio,
3622 unsigned int WritebackHTaps,
3623 unsigned int WritebackVTaps,
3624 long WritebackSourceWidth,
3625 long WritebackDestinationWidth,
3626 unsigned int HTotal,
3627 unsigned int WritebackLineBufferSize)
3629 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
3631 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3632 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3633 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3634 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3637 static double CalculateWriteBackDelay(
3638 enum source_format_class WritebackPixelFormat,
3639 double WritebackHRatio,
3640 double WritebackVRatio,
3641 unsigned int WritebackVTaps,
3642 int WritebackDestinationWidth,
3643 int WritebackDestinationHeight,
3644 int WritebackSourceHeight,
3645 unsigned int HTotal)
3647 double CalculateWriteBackDelay;
3649 double Output_lines_last_notclamped;
3650 double WritebackVInit;
3652 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3653 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3654 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3655 if (Output_lines_last_notclamped < 0) {
3656 CalculateWriteBackDelay = 0;
3658 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3660 return CalculateWriteBackDelay;
3663 static void CalculateVupdateAndDynamicMetadataParameters(
3664 int MaxInterDCNTileRepeaters,
3667 double DCFClkDeepSleep,
3671 int DynamicMetadataTransmittedBytes,
3672 int DynamicMetadataLinesBeforeActiveRequired,
3673 int InterlaceEnable,
3674 bool ProgressiveToInterlaceUnitInOPP,
3679 int *VUpdateOffsetPix,
3680 double *VUpdateWidthPix,
3681 double *VReadyOffsetPix)
3683 double TotalRepeaterDelayTime;
3685 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3686 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0);
3687 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0);
3688 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3689 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3690 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3691 *Tdmec = HTotal / PixelClock;
3692 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3693 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3695 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3697 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3698 *Tdmsks = *Tdmsks / 2;
3700 #ifdef __DML_VBA_DEBUG__
3701 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3702 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3703 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3707 static void CalculateRowBandwidth(
3709 enum source_format_class SourcePixelFormat,
3711 double VRatioChroma,
3714 unsigned int MetaRowByteLuma,
3715 unsigned int MetaRowByteChroma,
3716 unsigned int meta_row_height_luma,
3717 unsigned int meta_row_height_chroma,
3718 unsigned int PixelPTEBytesPerRowLuma,
3719 unsigned int PixelPTEBytesPerRowChroma,
3720 unsigned int dpte_row_height_luma,
3721 unsigned int dpte_row_height_chroma,
3722 double *meta_row_bw,
3723 double *dpte_row_bw)
3725 if (DCCEnable != true) {
3727 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3728 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
3730 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3733 if (GPUVMEnable != true) {
3735 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3736 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3737 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
3739 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3743 static void CalculateFlipSchedule(
3744 struct display_mode_lib *mode_lib,
3745 double HostVMInefficiencyFactor,
3746 double UrgentExtraLatency,
3747 double UrgentLatency,
3748 unsigned int GPUVMMaxPageTableLevels,
3750 unsigned int HostVMMaxNonCachedPageTableLevels,
3752 double HostVMMinPageSize,
3753 double PDEAndMetaPTEBytesPerFrame,
3754 double MetaRowBytes,
3755 double DPTEBytesPerRow,
3756 double BandwidthAvailableForImmediateFlip,
3757 unsigned int TotImmediateFlipBytes,
3758 enum source_format_class SourcePixelFormat,
3761 double VRatioChroma,
3764 unsigned int dpte_row_height,
3765 unsigned int meta_row_height,
3766 unsigned int dpte_row_height_chroma,
3767 unsigned int meta_row_height_chroma,
3768 double *DestinationLinesToRequestVMInImmediateFlip,
3769 double *DestinationLinesToRequestRowInImmediateFlip,
3770 double *final_flip_bw,
3771 bool *ImmediateFlipSupportedForPipe)
3773 double min_row_time = 0.0;
3774 unsigned int HostVMDynamicLevelsTrips;
3775 double TimeForFetchingMetaPTEImmediateFlip;
3776 double TimeForFetchingRowInVBlankImmediateFlip;
3777 double ImmediateFlipBW;
3779 if (GPUVMEnable == true && HostVMEnable == true) {
3780 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3782 HostVMDynamicLevelsTrips = 0;
3785 if (GPUVMEnable == true || DCCEnable == true) {
3786 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
3789 if (GPUVMEnable == true) {
3790 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3791 Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3792 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
3795 TimeForFetchingMetaPTEImmediateFlip = 0;
3798 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3799 if ((GPUVMEnable == true || DCCEnable == true)) {
3800 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
3801 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3802 UrgentLatency * (HostVMDynamicLevelsTrips + 1),
3805 TimeForFetchingRowInVBlankImmediateFlip = 0;
3808 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3810 if (GPUVMEnable == true) {
3811 *final_flip_bw = dml_max(
3812 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
3813 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
3814 } else if ((GPUVMEnable == true || DCCEnable == true)) {
3815 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
3820 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
3821 if (GPUVMEnable == true && DCCEnable != true) {
3822 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
3823 } else if (GPUVMEnable != true && DCCEnable == true) {
3824 min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
3826 min_row_time = dml_min4(
3827 dpte_row_height * LineTime / VRatio,
3828 meta_row_height * LineTime / VRatio,
3829 dpte_row_height_chroma * LineTime / VRatioChroma,
3830 meta_row_height_chroma * LineTime / VRatioChroma);
3833 if (GPUVMEnable == true && DCCEnable != true) {
3834 min_row_time = dpte_row_height * LineTime / VRatio;
3835 } else if (GPUVMEnable != true && DCCEnable == true) {
3836 min_row_time = meta_row_height * LineTime / VRatio;
3838 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
3842 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
3843 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3844 *ImmediateFlipSupportedForPipe = false;
3846 *ImmediateFlipSupportedForPipe = true;
3849 #ifdef __DML_VBA_DEBUG__
3850 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip);
3851 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip);
3852 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
3853 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
3854 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
3855 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
3860 static double TruncToValidBPP(
3868 enum output_encoder_class Output,
3869 enum output_format_class Format,
3870 unsigned int DSCInputBitPerComponent,
3874 enum odm_combine_mode ODMCombine)
3883 if (Format == dm_420) {
3888 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
3889 } else if (Format == dm_444) {
3894 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3896 if (Output == dm_hdmi) {
3905 if (Format == dm_n422) {
3907 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3910 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3914 if (DSCEnable && Output == dm_dp) {
3915 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3917 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3920 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3922 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3926 if (DesiredBPP == 0) {
3928 if (MaxLinkBPP < MinDSCBPP) {
3930 } else if (MaxLinkBPP >= MaxDSCBPP) {
3933 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3936 if (MaxLinkBPP >= NonDSCBPP2) {
3938 } else if (MaxLinkBPP >= NonDSCBPP1) {
3940 } else if (MaxLinkBPP >= NonDSCBPP0) {
3947 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
3948 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3957 void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3959 struct vba_vars_st *v = &mode_lib->vba;
3963 int ReorderingBytes;
3964 int MinPrefetchMode = 0, MaxPrefetchMode = 2;
3965 bool NoChroma = true;
3966 bool EnoughWritebackUnits = true;
3967 bool P2IWith420 = false;
3968 bool DSCOnlyIfNecessaryWithBPP = false;
3969 bool DSC422NativeNotSupported = false;
3970 double MaxTotalVActiveRDBandwidth;
3971 bool ViewportExceedsSurface = false;
3972 bool FMTBufferExceeded = false;
3974 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3976 CalculateMinAndMaxPrefetchMode(
3977 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3978 &MinPrefetchMode, &MaxPrefetchMode);
3980 /*Scale Ratio, taps Support Check*/
3982 v->ScaleRatioAndTapsSupport = true;
3983 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3984 if (v->ScalerEnabled[k] == false
3985 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3986 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3987 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3988 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
3989 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
3990 v->ScaleRatioAndTapsSupport = false;
3991 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3992 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
3993 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
3994 || v->VRatio[k] > v->vtaps[k]
3995 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3996 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3997 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3998 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
3999 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
4000 || v->HRatioChroma[k] > v->MaxHSCLRatio
4001 || v->VRatioChroma[k] > v->MaxVSCLRatio
4002 || v->HRatioChroma[k] > v->HTAPsChroma[k]
4003 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
4004 v->ScaleRatioAndTapsSupport = false;
4007 /*Source Format, Pixel Format and Scan Support Check*/
4009 v->SourceFormatPixelAndScanSupport = true;
4010 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4011 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
4012 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t
4013 || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) {
4014 v->SourceFormatPixelAndScanSupport = false;
4017 /*Bandwidth Support Check*/
4019 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4020 CalculateBytePerPixelAnd256BBlockSizes(
4021 v->SourcePixelFormat[k],
4022 v->SurfaceTiling[k],
4023 &v->BytePerPixelY[k],
4024 &v->BytePerPixelC[k],
4025 &v->BytePerPixelInDETY[k],
4026 &v->BytePerPixelInDETC[k],
4027 &v->Read256BlockHeightY[k],
4028 &v->Read256BlockHeightC[k],
4029 &v->Read256BlockWidthY[k],
4030 &v->Read256BlockWidthC[k]);
4032 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4033 if (v->SourceScan[k] != dm_vert) {
4034 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
4035 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
4037 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
4038 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
4041 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4042 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
4043 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4044 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
4045 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
4047 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4048 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
4049 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4050 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
4051 } else if (v->WritebackEnable[k] == true) {
4052 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4053 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
4055 v->WriteBandwidth[k] = 0.0;
4059 /*Writeback Latency support check*/
4061 v->WritebackLatencySupport = true;
4062 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4063 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
4064 v->WritebackLatencySupport = false;
4068 /*Writeback Mode Support Check*/
4070 v->TotalNumberOfActiveWriteback = 0;
4071 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4072 if (v->WritebackEnable[k] == true) {
4073 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
4077 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
4078 EnoughWritebackUnits = false;
4081 /*Writeback Scale Ratio and Taps Support Check*/
4083 v->WritebackScaleRatioAndTapsSupport = true;
4084 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4085 if (v->WritebackEnable[k] == true) {
4086 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
4087 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
4088 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
4089 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
4090 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
4091 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
4092 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
4093 v->WritebackScaleRatioAndTapsSupport = false;
4095 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
4096 v->WritebackScaleRatioAndTapsSupport = false;
4100 /*Maximum DISPCLK/DPPCLK Support check*/
4102 v->WritebackRequiredDISPCLK = 0.0;
4103 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4104 if (v->WritebackEnable[k] == true) {
4105 v->WritebackRequiredDISPCLK = dml_max(
4106 v->WritebackRequiredDISPCLK,
4107 dml31_CalculateWriteBackDISPCLK(
4108 v->WritebackPixelFormat[k],
4110 v->WritebackHRatio[k],
4111 v->WritebackVRatio[k],
4112 v->WritebackHTaps[k],
4113 v->WritebackVTaps[k],
4114 v->WritebackSourceWidth[k],
4115 v->WritebackDestinationWidth[k],
4117 v->WritebackLineBufferSize));
4120 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4121 if (v->HRatio[k] > 1.0) {
4122 v->PSCL_FACTOR[k] = dml_min(
4123 v->MaxDCHUBToPSCLThroughput,
4124 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
4126 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4128 if (v->BytePerPixelC[k] == 0.0) {
4129 v->PSCL_FACTOR_CHROMA[k] = 0.0;
4130 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4132 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4133 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4135 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4136 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4139 if (v->HRatioChroma[k] > 1.0) {
4140 v->PSCL_FACTOR_CHROMA[k] = dml_min(
4141 v->MaxDCHUBToPSCLThroughput,
4142 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
4144 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4146 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4148 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4149 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4150 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
4151 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
4153 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
4154 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4155 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4159 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4160 int MaximumSwathWidthSupportLuma;
4161 int MaximumSwathWidthSupportChroma;
4163 if (v->SurfaceTiling[k] == dm_sw_linear) {
4164 MaximumSwathWidthSupportLuma = 8192.0;
4165 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
4166 MaximumSwathWidthSupportLuma = 2880.0;
4167 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4168 MaximumSwathWidthSupportLuma = 3840.0;
4170 MaximumSwathWidthSupportLuma = 5760.0;
4173 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
4174 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
4176 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
4178 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
4179 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
4180 if (v->BytePerPixelC[k] == 0.0) {
4181 v->MaximumSwathWidthInLineBufferChroma = 0;
4183 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
4184 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
4186 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
4187 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
4190 CalculateSwathAndDETConfiguration(
4192 v->NumberOfActivePlanes,
4193 v->DETBufferSizeInKByte[0],
4194 v->MaximumSwathWidthLuma,
4195 v->MaximumSwathWidthChroma,
4197 v->SourcePixelFormat,
4205 v->Read256BlockHeightY,
4206 v->Read256BlockHeightC,
4207 v->Read256BlockWidthY,
4208 v->Read256BlockWidthC,
4209 v->odm_combine_dummy,
4210 v->BlendingAndTiming,
4213 v->BytePerPixelInDETY,
4214 v->BytePerPixelInDETC,
4218 v->NoOfDPPThisState,
4219 v->swath_width_luma_ub_this_state,
4220 v->swath_width_chroma_ub_this_state,
4221 v->SwathWidthYThisState,
4222 v->SwathWidthCThisState,
4223 v->SwathHeightYThisState,
4224 v->SwathHeightCThisState,
4225 v->DETBufferSizeYThisState,
4226 v->DETBufferSizeCThisState,
4227 v->SingleDPPViewportSizeSupportPerPlane,
4228 &v->ViewportSizeSupport[0][0]);
4230 for (i = 0; i < v->soc.num_states; i++) {
4231 for (j = 0; j < 2; j++) {
4232 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
4233 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
4234 v->RequiredDISPCLK[i][j] = 0.0;
4235 v->DISPCLK_DPPCLK_Support[i][j] = true;
4236 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4237 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4238 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4239 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
4240 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4241 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4242 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
4243 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4245 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4246 * (1 + v->DISPCLKRampingMargin / 100.0);
4247 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
4248 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4249 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4250 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
4251 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4253 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4254 * (1 + v->DISPCLKRampingMargin / 100.0);
4255 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
4256 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4257 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4258 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
4259 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4262 if (v->ODMCombinePolicy == dm_odm_combine_policy_none
4263 || !(v->Output[k] == dm_dp ||
4264 v->Output[k] == dm_edp)) {
4265 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4266 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4268 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4269 FMTBufferExceeded = true;
4270 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
4271 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4272 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4273 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
4274 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
4275 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4276 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4277 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
4278 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4279 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4281 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4282 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4284 if (v->DSCEnabled[k] && v->HActive[k] > DCN31_MAX_DSC_IMAGE_WIDTH
4285 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4286 if (v->HActive[k] / 2 > DCN31_MAX_DSC_IMAGE_WIDTH) {
4287 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4288 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4290 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4291 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4294 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN31_MAX_FMT_420_BUFFER_WIDTH
4295 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4296 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) {
4297 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4298 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4300 if (v->HActive[k] / 4 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4301 FMTBufferExceeded = true;
4303 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4304 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4307 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4308 v->MPCCombine[i][j][k] = false;
4309 v->NoOfDPP[i][j][k] = 4;
4310 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4311 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4312 v->MPCCombine[i][j][k] = false;
4313 v->NoOfDPP[i][j][k] = 2;
4314 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4315 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4316 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4317 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4318 v->MPCCombine[i][j][k] = false;
4319 v->NoOfDPP[i][j][k] = 1;
4320 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4322 v->MPCCombine[i][j][k] = true;
4323 v->NoOfDPP[i][j][k] = 2;
4324 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4326 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4327 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4328 > v->MaxDppclkRoundedDownToDFSGranularity)
4329 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4330 v->DISPCLK_DPPCLK_Support[i][j] = false;
4333 v->TotalNumberOfActiveDPP[i][j] = 0;
4334 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4335 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4336 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4337 if (v->NoOfDPP[i][j][k] == 1)
4338 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4339 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4340 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
4345 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
4346 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) {
4347 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4348 double BWOfNonSplitPlaneOfMaximumBandwidth;
4349 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4350 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4351 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4352 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4353 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4354 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4355 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4356 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4359 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4360 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4361 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4362 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4363 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4364 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4365 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4368 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4369 v->RequiredDISPCLK[i][j] = 0.0;
4370 v->DISPCLK_DPPCLK_Support[i][j] = true;
4371 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4372 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4373 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4374 v->MPCCombine[i][j][k] = true;
4375 v->NoOfDPP[i][j][k] = 2;
4376 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4377 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4379 v->MPCCombine[i][j][k] = false;
4380 v->NoOfDPP[i][j][k] = 1;
4381 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4382 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4384 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4385 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4386 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4387 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4389 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4391 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4392 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4393 > v->MaxDppclkRoundedDownToDFSGranularity)
4394 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4395 v->DISPCLK_DPPCLK_Support[i][j] = false;
4398 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4399 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4400 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4403 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4404 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4405 v->DISPCLK_DPPCLK_Support[i][j] = false;
4410 /*Total Available Pipes Support Check*/
4412 for (i = 0; i < v->soc.num_states; i++) {
4413 for (j = 0; j < 2; j++) {
4414 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4415 v->TotalAvailablePipesSupport[i][j] = true;
4417 v->TotalAvailablePipesSupport[i][j] = false;
4421 /*Display IO and DSC Support Check*/
4423 v->NonsupportedDSCInputBPC = false;
4424 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4425 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
4426 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
4427 v->NonsupportedDSCInputBPC = true;
4431 /*Number Of DSC Slices*/
4432 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4433 if (v->BlendingAndTiming[k] == k) {
4434 if (v->PixelClockBackEnd[k] > 3200) {
4435 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4436 } else if (v->PixelClockBackEnd[k] > 1360) {
4437 v->NumberOfDSCSlices[k] = 8;
4438 } else if (v->PixelClockBackEnd[k] > 680) {
4439 v->NumberOfDSCSlices[k] = 4;
4440 } else if (v->PixelClockBackEnd[k] > 340) {
4441 v->NumberOfDSCSlices[k] = 2;
4443 v->NumberOfDSCSlices[k] = 1;
4446 v->NumberOfDSCSlices[k] = 0;
4450 for (i = 0; i < v->soc.num_states; i++) {
4451 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4452 v->RequiresDSC[i][k] = false;
4453 v->RequiresFEC[i][k] = false;
4454 if (v->BlendingAndTiming[k] == k) {
4455 if (v->Output[k] == dm_hdmi) {
4456 v->RequiresDSC[i][k] = false;
4457 v->RequiresFEC[i][k] = false;
4458 v->OutputBppPerState[i][k] = TruncToValidBPP(
4459 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4463 v->PixelClockBackEnd[k],
4464 v->ForcedOutputLinkBPP[k],
4468 v->DSCInputBitPerComponent[k],
4469 v->NumberOfDSCSlices[k],
4470 v->AudioSampleRate[k],
4471 v->AudioSampleLayout[k],
4472 v->ODMCombineEnablePerState[i][k]);
4473 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) {
4474 if (v->DSCEnable[k] == true) {
4475 v->RequiresDSC[i][k] = true;
4476 v->LinkDSCEnable = true;
4477 if (v->Output[k] == dm_dp) {
4478 v->RequiresFEC[i][k] = true;
4480 v->RequiresFEC[i][k] = false;
4483 v->RequiresDSC[i][k] = false;
4484 v->LinkDSCEnable = false;
4485 v->RequiresFEC[i][k] = false;
4488 v->Outbpp = BPP_INVALID;
4489 if (v->PHYCLKPerState[i] >= 270.0) {
4490 v->Outbpp = TruncToValidBPP(
4491 (1.0 - v->Downspreading / 100.0) * 2700,
4492 v->OutputLinkDPLanes[k],
4495 v->PixelClockBackEnd[k],
4496 v->ForcedOutputLinkBPP[k],
4500 v->DSCInputBitPerComponent[k],
4501 v->NumberOfDSCSlices[k],
4502 v->AudioSampleRate[k],
4503 v->AudioSampleLayout[k],
4504 v->ODMCombineEnablePerState[i][k]);
4505 v->OutputBppPerState[i][k] = v->Outbpp;
4506 // TODO: Need some other way to handle this nonsense
4507 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4509 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4510 v->Outbpp = TruncToValidBPP(
4511 (1.0 - v->Downspreading / 100.0) * 5400,
4512 v->OutputLinkDPLanes[k],
4515 v->PixelClockBackEnd[k],
4516 v->ForcedOutputLinkBPP[k],
4520 v->DSCInputBitPerComponent[k],
4521 v->NumberOfDSCSlices[k],
4522 v->AudioSampleRate[k],
4523 v->AudioSampleLayout[k],
4524 v->ODMCombineEnablePerState[i][k]);
4525 v->OutputBppPerState[i][k] = v->Outbpp;
4526 // TODO: Need some other way to handle this nonsense
4527 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4529 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4530 v->Outbpp = TruncToValidBPP(
4531 (1.0 - v->Downspreading / 100.0) * 8100,
4532 v->OutputLinkDPLanes[k],
4535 v->PixelClockBackEnd[k],
4536 v->ForcedOutputLinkBPP[k],
4540 v->DSCInputBitPerComponent[k],
4541 v->NumberOfDSCSlices[k],
4542 v->AudioSampleRate[k],
4543 v->AudioSampleLayout[k],
4544 v->ODMCombineEnablePerState[i][k]);
4545 v->OutputBppPerState[i][k] = v->Outbpp;
4546 // TODO: Need some other way to handle this nonsense
4547 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4549 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 10000.0 / 18) {
4550 v->Outbpp = TruncToValidBPP(
4551 (1.0 - v->Downspreading / 100.0) * 10000,
4555 v->PixelClockBackEnd[k],
4556 v->ForcedOutputLinkBPP[k],
4560 v->DSCInputBitPerComponent[k],
4561 v->NumberOfDSCSlices[k],
4562 v->AudioSampleRate[k],
4563 v->AudioSampleLayout[k],
4564 v->ODMCombineEnablePerState[i][k]);
4565 v->OutputBppPerState[i][k] = v->Outbpp;
4566 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "10x4";
4568 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 12000.0 / 18) {
4569 v->Outbpp = TruncToValidBPP(
4574 v->PixelClockBackEnd[k],
4575 v->ForcedOutputLinkBPP[k],
4579 v->DSCInputBitPerComponent[k],
4580 v->NumberOfDSCSlices[k],
4581 v->AudioSampleRate[k],
4582 v->AudioSampleLayout[k],
4583 v->ODMCombineEnablePerState[i][k]);
4584 v->OutputBppPerState[i][k] = v->Outbpp;
4585 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "12x4";
4589 v->OutputBppPerState[i][k] = 0;
4594 for (i = 0; i < v->soc.num_states; i++) {
4595 v->LinkCapacitySupport[i] = true;
4596 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4597 if (v->BlendingAndTiming[k] == k
4598 && (v->Output[k] == dm_dp ||
4599 v->Output[k] == dm_edp ||
4600 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
4601 v->LinkCapacitySupport[i] = false;
4607 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4608 if (v->BlendingAndTiming[k] == k
4609 && (v->Output[k] == dm_dp ||
4610 v->Output[k] == dm_edp ||
4611 v->Output[k] == dm_hdmi)) {
4612 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
4615 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
4616 && !v->DSC422NativeSupport) {
4617 DSC422NativeNotSupported = true;
4622 for (i = 0; i < v->soc.num_states; ++i) {
4623 v->ODMCombine4To1SupportCheckOK[i] = true;
4624 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4625 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4626 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
4627 || v->Output[k] == dm_hdmi)) {
4628 v->ODMCombine4To1SupportCheckOK[i] = false;
4633 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4635 for (i = 0; i < v->soc.num_states; i++) {
4636 v->NotEnoughDSCUnits[i] = false;
4637 v->TotalDSCUnitsRequired = 0.0;
4638 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4639 if (v->RequiresDSC[i][k] == true) {
4640 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4641 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4642 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4643 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4645 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4649 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4650 v->NotEnoughDSCUnits[i] = true;
4653 /*DSC Delay per state*/
4655 for (i = 0; i < v->soc.num_states; i++) {
4656 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4657 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4660 v->BPP = v->OutputBppPerState[i][k];
4662 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4663 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4664 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4665 v->DSCInputBitPerComponent[k],
4667 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4668 v->NumberOfDSCSlices[k],
4670 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4671 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4672 v->DSCDelayPerState[i][k] = 2.0
4673 * (dscceComputeDelay(
4674 v->DSCInputBitPerComponent[k],
4676 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4677 v->NumberOfDSCSlices[k] / 2,
4679 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4681 v->DSCDelayPerState[i][k] = 4.0
4682 * (dscceComputeDelay(
4683 v->DSCInputBitPerComponent[k],
4685 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4686 v->NumberOfDSCSlices[k] / 4,
4688 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4690 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4692 v->DSCDelayPerState[i][k] = 0.0;
4695 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4696 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4697 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4698 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4704 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4706 for (i = 0; i < v->soc.num_states; ++i) {
4707 for (j = 0; j <= 1; ++j) {
4708 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4709 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4710 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4711 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4714 CalculateSwathAndDETConfiguration(
4716 v->NumberOfActivePlanes,
4717 v->DETBufferSizeInKByte[0],
4718 v->MaximumSwathWidthLuma,
4719 v->MaximumSwathWidthChroma,
4721 v->SourcePixelFormat,
4729 v->Read256BlockHeightY,
4730 v->Read256BlockHeightC,
4731 v->Read256BlockWidthY,
4732 v->Read256BlockWidthC,
4733 v->ODMCombineEnableThisState,
4734 v->BlendingAndTiming,
4737 v->BytePerPixelInDETY,
4738 v->BytePerPixelInDETC,
4742 v->NoOfDPPThisState,
4743 v->swath_width_luma_ub_this_state,
4744 v->swath_width_chroma_ub_this_state,
4745 v->SwathWidthYThisState,
4746 v->SwathWidthCThisState,
4747 v->SwathHeightYThisState,
4748 v->SwathHeightCThisState,
4749 v->DETBufferSizeYThisState,
4750 v->DETBufferSizeCThisState,
4752 &v->ViewportSizeSupport[i][j]);
4754 CalculateDCFCLKDeepSleep(
4756 v->NumberOfActivePlanes,
4761 v->SwathWidthYThisState,
4762 v->SwathWidthCThisState,
4763 v->NoOfDPPThisState,
4768 v->PSCL_FACTOR_CHROMA,
4769 v->RequiredDPPCLKThisState,
4770 v->ReadBandwidthLuma,
4771 v->ReadBandwidthChroma,
4773 &v->ProjectedDCFCLKDeepSleep[i][j]);
4775 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4776 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4777 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4778 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4779 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4780 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4781 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4782 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4783 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4788 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4789 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
4790 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4793 for (i = 0; i < v->soc.num_states; i++) {
4794 for (j = 0; j < 2; j++) {
4795 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
4797 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4798 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4799 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4800 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4801 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4802 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4803 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4804 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4805 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4808 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4809 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4810 if (v->DCCEnable[k] == true) {
4811 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4815 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4816 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4817 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4819 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
4820 && v->SourceScan[k] != dm_vert) {
4821 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
4823 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4825 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4826 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4829 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4832 v->Read256BlockHeightC[k],
4833 v->Read256BlockWidthC[k],
4834 v->SourcePixelFormat[k],
4835 v->SurfaceTiling[k],
4836 v->BytePerPixelC[k],
4838 v->SwathWidthCThisState[k],
4839 v->ViewportHeightChroma[k],
4842 v->HostVMMaxNonCachedPageTableLevels,
4843 v->GPUVMMinPageSize,
4844 v->HostVMMinPageSize,
4845 v->PTEBufferSizeInRequestsForChroma,
4848 &v->MacroTileWidthC[k],
4850 &v->DPTEBytesPerRowC,
4851 &v->PTEBufferSizeNotExceededC[i][j][k],
4853 &v->dpte_row_height_chroma[k],
4857 &v->meta_row_height_chroma[k],
4864 &v->dummyinteger11);
4866 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4871 v->ProgressiveToInterlaceUnitInOPP,
4872 v->SwathHeightCThisState[k],
4873 v->ViewportYStartC[k],
4877 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4878 v->PTEBufferSizeInRequestsForChroma = 0;
4879 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4880 v->MetaRowBytesC = 0.0;
4881 v->DPTEBytesPerRowC = 0.0;
4882 v->PrefetchLinesC[i][j][k] = 0.0;
4883 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4885 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4888 v->Read256BlockHeightY[k],
4889 v->Read256BlockWidthY[k],
4890 v->SourcePixelFormat[k],
4891 v->SurfaceTiling[k],
4892 v->BytePerPixelY[k],
4894 v->SwathWidthYThisState[k],
4895 v->ViewportHeight[k],
4898 v->HostVMMaxNonCachedPageTableLevels,
4899 v->GPUVMMinPageSize,
4900 v->HostVMMinPageSize,
4901 v->PTEBufferSizeInRequestsForLuma,
4903 v->DCCMetaPitchY[k],
4904 &v->MacroTileWidthY[k],
4906 &v->DPTEBytesPerRowY,
4907 &v->PTEBufferSizeNotExceededY[i][j][k],
4909 &v->dpte_row_height[k],
4913 &v->meta_row_height[k],
4915 &v->dpte_group_bytes[k],
4921 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4926 v->ProgressiveToInterlaceUnitInOPP,
4927 v->SwathHeightYThisState[k],
4928 v->ViewportYStartY[k],
4931 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4932 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4933 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4935 CalculateRowBandwidth(
4937 v->SourcePixelFormat[k],
4941 v->HTotal[k] / v->PixelClock[k],
4944 v->meta_row_height[k],
4945 v->meta_row_height_chroma[k],
4946 v->DPTEBytesPerRowY,
4947 v->DPTEBytesPerRowC,
4948 v->dpte_row_height[k],
4949 v->dpte_row_height_chroma[k],
4950 &v->meta_row_bandwidth[i][j][k],
4951 &v->dpte_row_bandwidth[i][j][k]);
4953 v->UrgLatency[i] = CalculateUrgentLatency(
4954 v->UrgentLatencyPixelDataOnly,
4955 v->UrgentLatencyPixelMixedWithVMData,
4956 v->UrgentLatencyVMDataOnly,
4957 v->DoUrgentLatencyAdjustment,
4958 v->UrgentLatencyAdjustmentFabricClockComponent,
4959 v->UrgentLatencyAdjustmentFabricClockReference,
4960 v->FabricClockPerState[i]);
4962 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4963 CalculateUrgentBurstFactor(
4964 v->swath_width_luma_ub_this_state[k],
4965 v->swath_width_chroma_ub_this_state[k],
4966 v->SwathHeightYThisState[k],
4967 v->SwathHeightCThisState[k],
4968 v->HTotal[k] / v->PixelClock[k],
4970 v->CursorBufferSize,
4971 v->CursorWidth[k][0],
4975 v->BytePerPixelInDETY[k],
4976 v->BytePerPixelInDETC[k],
4977 v->DETBufferSizeYThisState[k],
4978 v->DETBufferSizeCThisState[k],
4979 &v->UrgentBurstFactorCursor[k],
4980 &v->UrgentBurstFactorLuma[k],
4981 &v->UrgentBurstFactorChroma[k],
4982 &NotUrgentLatencyHiding[k]);
4985 v->NotEnoughUrgentLatencyHidingA[i][j] = false;
4986 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4987 if (NotUrgentLatencyHiding[k]) {
4988 v->NotEnoughUrgentLatencyHidingA[i][j] = true;
4992 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4993 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
4994 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
4995 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
4998 v->TotalVActivePixelBandwidth[i][j] = 0;
4999 v->TotalVActiveCursorBandwidth[i][j] = 0;
5000 v->TotalMetaRowBandwidth[i][j] = 0;
5001 v->TotalDPTERowBandwidth[i][j] = 0;
5002 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5003 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
5004 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
5005 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
5006 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
5011 //Calculate Return BW
5012 for (i = 0; i < v->soc.num_states; ++i) {
5013 for (j = 0; j <= 1; ++j) {
5014 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5015 if (v->BlendingAndTiming[k] == k) {
5016 if (v->WritebackEnable[k] == true) {
5017 v->WritebackDelayTime[k] = v->WritebackLatency
5018 + CalculateWriteBackDelay(
5019 v->WritebackPixelFormat[k],
5020 v->WritebackHRatio[k],
5021 v->WritebackVRatio[k],
5022 v->WritebackVTaps[k],
5023 v->WritebackDestinationWidth[k],
5024 v->WritebackDestinationHeight[k],
5025 v->WritebackSourceHeight[k],
5026 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
5028 v->WritebackDelayTime[k] = 0.0;
5030 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5031 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
5032 v->WritebackDelayTime[k] = dml_max(
5033 v->WritebackDelayTime[k],
5035 + CalculateWriteBackDelay(
5036 v->WritebackPixelFormat[m],
5037 v->WritebackHRatio[m],
5038 v->WritebackVRatio[m],
5039 v->WritebackVTaps[m],
5040 v->WritebackDestinationWidth[m],
5041 v->WritebackDestinationHeight[m],
5042 v->WritebackSourceHeight[m],
5043 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
5048 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5049 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5050 if (v->BlendingAndTiming[k] == m) {
5051 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
5055 v->MaxMaxVStartup[i][j] = 0;
5056 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5057 v->MaximumVStartup[i][j][k] =
5058 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
5059 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
5060 v->VTotal[k] - v->VActive[k]
5064 1.0 * v->WritebackDelayTime[k]
5066 / v->PixelClock[k]),
5068 if (v->MaximumVStartup[i][j][k] > 1023)
5069 v->MaximumVStartup[i][j][k] = 1023;
5070 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
5075 ReorderingBytes = v->NumberOfChannels
5077 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
5078 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
5079 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
5081 for (i = 0; i < v->soc.num_states; ++i) {
5082 for (j = 0; j <= 1; ++j) {
5083 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
5087 if (v->UseMinimumRequiredDCFCLK == true) {
5090 v->MaxInterDCNTileRepeaters,
5092 v->DRAMClockChangeLatency,
5093 v->SREnterPlusExitTime,
5095 v->RoundTripPingLatencyCycles,
5097 v->PixelChunkSizeInKByte,
5100 v->GPUVMMaxPageTableLevels,
5102 v->NumberOfActivePlanes,
5103 v->HostVMMinPageSize,
5104 v->HostVMMaxNonCachedPageTableLevels,
5105 v->DynamicMetadataVMEnabled,
5106 v->ImmediateFlipRequirement,
5107 v->ProgressiveToInterlaceUnitInOPP,
5108 v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation,
5109 v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency,
5112 v->DynamicMetadataTransmittedBytes,
5113 v->DynamicMetadataLinesBeforeActiveRequired,
5119 v->ProjectedDCFCLKDeepSleep,
5121 v->TotalVActivePixelBandwidth,
5122 v->TotalVActiveCursorBandwidth,
5123 v->TotalMetaRowBandwidth,
5124 v->TotalDPTERowBandwidth,
5125 v->TotalNumberOfActiveDPP,
5126 v->TotalNumberOfDCCActiveDPP,
5127 v->dpte_group_bytes,
5130 v->swath_width_luma_ub_all_states,
5131 v->swath_width_chroma_ub_all_states,
5136 v->PDEAndMetaPTEBytesPerFrame,
5139 v->DynamicMetadataEnable,
5140 v->VActivePixelBandwidth,
5141 v->VActiveCursorBandwidth,
5142 v->ReadBandwidthLuma,
5143 v->ReadBandwidthChroma,
5148 for (i = 0; i < v->soc.num_states; ++i) {
5149 for (j = 0; j <= 1; ++j) {
5150 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
5151 v->ReturnBusWidth * v->DCFCLKState[i][j],
5152 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
5153 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
5154 double PixelDataOnlyReturnBWPerState = dml_min(
5155 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5156 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
5157 double PixelMixedWithVMDataReturnBWPerState = dml_min(
5158 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5159 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
5161 if (v->HostVMEnable != true) {
5162 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
5164 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
5169 //Re-ordering Buffer Support Check
5170 for (i = 0; i < v->soc.num_states; ++i) {
5171 for (j = 0; j <= 1; ++j) {
5172 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
5173 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
5174 v->ROBSupport[i][j] = true;
5176 v->ROBSupport[i][j] = false;
5181 //Vertical Active BW support check
5183 MaxTotalVActiveRDBandwidth = 0;
5184 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5185 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
5188 for (i = 0; i < v->soc.num_states; ++i) {
5189 for (j = 0; j <= 1; ++j) {
5190 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
5192 v->ReturnBusWidth * v->DCFCLKState[i][j],
5193 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5194 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
5195 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5196 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
5198 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
5199 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
5201 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
5206 v->UrgentLatency = CalculateUrgentLatency(
5207 v->UrgentLatencyPixelDataOnly,
5208 v->UrgentLatencyPixelMixedWithVMData,
5209 v->UrgentLatencyVMDataOnly,
5210 v->DoUrgentLatencyAdjustment,
5211 v->UrgentLatencyAdjustmentFabricClockComponent,
5212 v->UrgentLatencyAdjustmentFabricClockReference,
5215 for (i = 0; i < v->soc.num_states; ++i) {
5216 for (j = 0; j <= 1; ++j) {
5217 double VMDataOnlyReturnBWPerState;
5218 double HostVMInefficiencyFactor = 1;
5219 int NextPrefetchModeState = MinPrefetchMode;
5220 bool UnboundedRequestEnabledThisState = false;
5221 int CompressedBufferSizeInkByteThisState = 0;
5224 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
5226 v->BandwidthWithoutPrefetchSupported[i][j] = true;
5227 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
5228 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
5229 v->BandwidthWithoutPrefetchSupported[i][j] = false;
5232 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5233 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
5234 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
5235 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
5236 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
5237 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
5238 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
5239 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
5240 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
5241 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
5244 VMDataOnlyReturnBWPerState = dml_min(
5246 v->ReturnBusWidth * v->DCFCLKState[i][j],
5247 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5248 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5249 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5250 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
5251 if (v->GPUVMEnable && v->HostVMEnable)
5252 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
5254 v->ExtraLatency = CalculateExtraLatency(
5255 v->RoundTripPingLatencyCycles,
5257 v->DCFCLKState[i][j],
5258 v->TotalNumberOfActiveDPP[i][j],
5259 v->PixelChunkSizeInKByte,
5260 v->TotalNumberOfDCCActiveDPP[i][j],
5262 v->ReturnBWPerState[i][j],
5265 v->NumberOfActivePlanes,
5266 v->NoOfDPPThisState,
5267 v->dpte_group_bytes,
5268 HostVMInefficiencyFactor,
5269 v->HostVMMinPageSize,
5270 v->HostVMMaxNonCachedPageTableLevels);
5272 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5274 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
5275 v->MaxVStartup = v->NextMaxVStartup;
5277 v->TWait = CalculateTWait(
5278 v->PrefetchModePerState[i][j],
5279 v->DRAMClockChangeLatency,
5281 v->SREnterPlusExitTime);
5283 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5286 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
5287 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
5288 myPipe.PixelClock = v->PixelClock[k];
5289 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
5290 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
5291 myPipe.ScalerEnabled = v->ScalerEnabled[k];
5292 myPipe.SourceScan = v->SourceScan[k];
5293 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
5294 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
5295 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
5296 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
5297 myPipe.InterlaceEnable = v->Interlace[k];
5298 myPipe.NumberOfCursors = v->NumberOfCursors[k];
5299 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
5300 myPipe.HTotal = v->HTotal[k];
5301 myPipe.DCCEnable = v->DCCEnable[k];
5302 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
5303 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
5304 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
5305 myPipe.BytePerPixelY = v->BytePerPixelY[k];
5306 myPipe.BytePerPixelC = v->BytePerPixelC[k];
5307 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
5308 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
5310 HostVMInefficiencyFactor,
5312 v->DSCDelayPerState[i][k],
5313 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
5315 v->DPPCLKDelaySCLLBOnly,
5316 v->DPPCLKDelayCNVCCursor,
5317 v->DISPCLKDelaySubtotal,
5318 v->SwathWidthYThisState[k] / v->HRatio[k],
5320 v->MaxInterDCNTileRepeaters,
5321 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
5322 v->MaximumVStartup[i][j][k],
5323 v->GPUVMMaxPageTableLevels,
5326 v->HostVMMaxNonCachedPageTableLevels,
5327 v->HostVMMinPageSize,
5328 v->DynamicMetadataEnable[k],
5329 v->DynamicMetadataVMEnabled,
5330 v->DynamicMetadataLinesBeforeActiveRequired[k],
5331 v->DynamicMetadataTransmittedBytes[k],
5335 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5336 v->MetaRowBytes[i][j][k],
5337 v->DPTEBytesPerRow[i][j][k],
5338 v->PrefetchLinesY[i][j][k],
5339 v->SwathWidthYThisState[k],
5342 v->PrefetchLinesC[i][j][k],
5343 v->SwathWidthCThisState[k],
5346 v->swath_width_luma_ub_this_state[k],
5347 v->swath_width_chroma_ub_this_state[k],
5348 v->SwathHeightYThisState[k],
5349 v->SwathHeightCThisState[k],
5351 &v->DSTXAfterScaler[k],
5352 &v->DSTYAfterScaler[k],
5353 &v->LineTimesForPrefetch[k],
5355 &v->LinesForMetaPTE[k],
5356 &v->LinesForMetaAndDPTERow[k],
5357 &v->VRatioPreY[i][j][k],
5358 &v->VRatioPreC[i][j][k],
5359 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
5360 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
5361 &v->NoTimeForDynamicMetadata[i][j][k],
5363 &v->prefetch_vmrow_bw[k],
5367 &v->VUpdateOffsetPix[k],
5368 &v->VUpdateWidthPix[k],
5369 &v->VReadyOffsetPix[k]);
5372 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5373 CalculateUrgentBurstFactor(
5374 v->swath_width_luma_ub_this_state[k],
5375 v->swath_width_chroma_ub_this_state[k],
5376 v->SwathHeightYThisState[k],
5377 v->SwathHeightCThisState[k],
5378 v->HTotal[k] / v->PixelClock[k],
5380 v->CursorBufferSize,
5381 v->CursorWidth[k][0],
5383 v->VRatioPreY[i][j][k],
5384 v->VRatioPreC[i][j][k],
5385 v->BytePerPixelInDETY[k],
5386 v->BytePerPixelInDETC[k],
5387 v->DETBufferSizeYThisState[k],
5388 v->DETBufferSizeCThisState[k],
5389 &v->UrgentBurstFactorCursorPre[k],
5390 &v->UrgentBurstFactorLumaPre[k],
5391 &v->UrgentBurstFactorChroma[k],
5392 &v->NotUrgentLatencyHidingPre[k]);
5395 v->MaximumReadBandwidthWithPrefetch = 0.0;
5396 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5397 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
5398 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
5400 v->MaximumReadBandwidthWithPrefetch =
5401 v->MaximumReadBandwidthWithPrefetch
5403 v->VActivePixelBandwidth[i][j][k],
5404 v->VActiveCursorBandwidth[i][j][k]
5405 + v->NoOfDPP[i][j][k]
5406 * (v->meta_row_bandwidth[i][j][k]
5407 + v->dpte_row_bandwidth[i][j][k]),
5408 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5410 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5411 * v->UrgentBurstFactorLumaPre[k]
5412 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5413 * v->UrgentBurstFactorChromaPre[k])
5414 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5417 v->NotEnoughUrgentLatencyHidingPre = false;
5418 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5419 if (v->NotUrgentLatencyHidingPre[k] == true) {
5420 v->NotEnoughUrgentLatencyHidingPre = true;
5424 v->PrefetchSupported[i][j] = true;
5425 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5426 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5427 v->PrefetchSupported[i][j] = false;
5429 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5430 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5431 || v->NoTimeForPrefetch[i][j][k] == true) {
5432 v->PrefetchSupported[i][j] = false;
5436 v->DynamicMetadataSupported[i][j] = true;
5437 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5438 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5439 v->DynamicMetadataSupported[i][j] = false;
5443 v->VRatioInPrefetchSupported[i][j] = true;
5444 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5445 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5446 v->VRatioInPrefetchSupported[i][j] = false;
5449 v->AnyLinesForVMOrRowTooLarge = false;
5450 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5451 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5452 v->AnyLinesForVMOrRowTooLarge = true;
5456 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5458 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5459 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5460 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5461 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5463 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5465 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5466 * v->UrgentBurstFactorLumaPre[k]
5467 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5468 * v->UrgentBurstFactorChromaPre[k])
5469 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5471 v->TotImmediateFlipBytes = 0.0;
5472 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5473 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
5474 + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
5475 + v->DPTEBytesPerRow[i][j][k];
5478 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5479 CalculateFlipSchedule(
5481 HostVMInefficiencyFactor,
5484 v->GPUVMMaxPageTableLevels,
5486 v->HostVMMaxNonCachedPageTableLevels,
5488 v->HostVMMinPageSize,
5489 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5490 v->MetaRowBytes[i][j][k],
5491 v->DPTEBytesPerRow[i][j][k],
5492 v->BandwidthAvailableForImmediateFlip,
5493 v->TotImmediateFlipBytes,
5494 v->SourcePixelFormat[k],
5495 v->HTotal[k] / v->PixelClock[k],
5500 v->dpte_row_height[k],
5501 v->meta_row_height[k],
5502 v->dpte_row_height_chroma[k],
5503 v->meta_row_height_chroma[k],
5504 &v->DestinationLinesToRequestVMInImmediateFlip[k],
5505 &v->DestinationLinesToRequestRowInImmediateFlip[k],
5506 &v->final_flip_bw[k],
5507 &v->ImmediateFlipSupportedForPipe[k]);
5509 v->total_dcn_read_bw_with_flip = 0.0;
5510 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5511 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5513 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5514 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5515 + v->VActiveCursorBandwidth[i][j][k],
5517 * (v->final_flip_bw[k]
5518 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5519 * v->UrgentBurstFactorLumaPre[k]
5520 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5521 * v->UrgentBurstFactorChromaPre[k])
5522 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5524 v->ImmediateFlipSupportedForState[i][j] = true;
5525 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5526 v->ImmediateFlipSupportedForState[i][j] = false;
5528 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5529 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5530 v->ImmediateFlipSupportedForState[i][j] = false;
5534 v->ImmediateFlipSupportedForState[i][j] = false;
5537 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
5538 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5539 NextPrefetchModeState = NextPrefetchModeState + 1;
5541 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5543 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5544 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5545 && ((v->HostVMEnable == false && v->ImmediateFlipRequirement != dm_immediate_flip_required)
5546 || v->ImmediateFlipSupportedForState[i][j] == true))
5547 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5549 CalculateUnboundedRequestAndCompressedBufferSize(
5550 v->DETBufferSizeInKByte[0],
5551 v->ConfigReturnBufferSizeInKByte,
5552 v->UseUnboundedRequesting,
5553 v->TotalNumberOfActiveDPP[i][j],
5556 v->CompressedBufferSegmentSizeInkByte,
5558 &UnboundedRequestEnabledThisState,
5559 &CompressedBufferSizeInkByteThisState);
5561 CalculateWatermarksAndDRAMSpeedChangeSupport(
5563 v->PrefetchModePerState[i][j],
5564 v->NumberOfActivePlanes,
5565 v->MaxLineBufferLines,
5567 v->WritebackInterfaceBufferSize,
5568 v->DCFCLKState[i][j],
5569 v->ReturnBWPerState[i][j],
5570 v->SynchronizedVBlank,
5571 v->dpte_group_bytes,
5575 v->WritebackLatency,
5576 v->WritebackChunkSize,
5577 v->SOCCLKPerState[i],
5578 v->DRAMClockChangeLatency,
5580 v->SREnterPlusExitTime,
5582 v->SREnterPlusExitZ8Time,
5583 v->ProjectedDCFCLKDeepSleep[i][j],
5584 v->DETBufferSizeYThisState,
5585 v->DETBufferSizeCThisState,
5586 v->SwathHeightYThisState,
5587 v->SwathHeightCThisState,
5589 v->SwathWidthYThisState,
5590 v->SwathWidthCThisState,
5599 v->BlendingAndTiming,
5600 v->NoOfDPPThisState,
5601 v->BytePerPixelInDETY,
5602 v->BytePerPixelInDETC,
5606 v->WritebackPixelFormat,
5607 v->WritebackDestinationWidth,
5608 v->WritebackDestinationHeight,
5609 v->WritebackSourceHeight,
5610 UnboundedRequestEnabledThisState,
5611 CompressedBufferSizeInkByteThisState,
5612 &v->DRAMClockChangeSupport[i][j],
5613 &v->UrgentWatermark,
5614 &v->WritebackUrgentWatermark,
5615 &v->DRAMClockChangeWatermark,
5616 &v->WritebackDRAMClockChangeWatermark,
5621 &v->MinActiveDRAMClockChangeLatencySupported);
5625 /*PTE Buffer Size Check*/
5626 for (i = 0; i < v->soc.num_states; i++) {
5627 for (j = 0; j < 2; j++) {
5628 v->PTEBufferSizeNotExceeded[i][j] = true;
5629 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5630 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5631 v->PTEBufferSizeNotExceeded[i][j] = false;
5637 /*Cursor Support Check*/
5638 v->CursorSupport = true;
5639 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5640 if (v->CursorWidth[k][0] > 0.0) {
5641 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5642 v->CursorSupport = false;
5647 /*Valid Pitch Check*/
5648 v->PitchSupport = true;
5649 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5650 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5651 if (v->DCCEnable[k] == true) {
5652 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5654 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5656 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5657 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
5658 && v->SourcePixelFormat[k] != dm_mono_8) {
5659 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5660 if (v->DCCEnable[k] == true) {
5661 v->AlignedDCCMetaPitchC[k] = dml_ceil(
5662 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]),
5663 64.0 * v->Read256BlockWidthC[k]);
5665 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5668 v->AlignedCPitch[k] = v->PitchC[k];
5669 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5671 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
5672 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5673 v->PitchSupport = false;
5677 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5678 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
5679 ViewportExceedsSurface = true;
5680 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
5681 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
5682 && v->SourcePixelFormat[k] != dm_rgbe) {
5683 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
5684 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5685 ViewportExceedsSurface = true;
5691 /*Mode Support, Voltage State and SOC Configuration*/
5692 for (i = v->soc.num_states - 1; i >= 0; i--) {
5693 for (j = 0; j < 2; j++) {
5694 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
5695 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
5696 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
5697 && v->DTBCLKRequiredMoreThanSupported[i] == false
5698 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
5699 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
5700 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
5701 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
5702 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
5703 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5704 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
5705 && ((v->HostVMEnable == false && v->ImmediateFlipRequirement != dm_immediate_flip_required)
5706 || v->ImmediateFlipSupportedForState[i][j] == true)
5707 && FMTBufferExceeded == false) {
5708 v->ModeSupport[i][j] = true;
5710 v->ModeSupport[i][j] = false;
5716 unsigned int MaximumMPCCombine = 0;
5717 for (i = v->soc.num_states; i >= 0; i--) {
5718 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5719 v->VoltageLevel = i;
5720 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5721 if (v->ModeSupport[i][0] == true) {
5722 MaximumMPCCombine = 0;
5724 MaximumMPCCombine = 1;
5728 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5729 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5730 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5731 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5733 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5734 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5735 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5736 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5737 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5738 v->maxMpcComb = MaximumMPCCombine;
5742 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5743 struct display_mode_lib *mode_lib,
5744 unsigned int PrefetchMode,
5745 unsigned int NumberOfActivePlanes,
5746 unsigned int MaxLineBufferLines,
5747 unsigned int LineBufferSize,
5748 unsigned int WritebackInterfaceBufferSize,
5751 bool SynchronizedVBlank,
5752 unsigned int dpte_group_bytes[],
5753 unsigned int MetaChunkSize,
5754 double UrgentLatency,
5755 double ExtraLatency,
5756 double WritebackLatency,
5757 double WritebackChunkSize,
5759 double DRAMClockChangeLatency,
5761 double SREnterPlusExitTime,
5762 double SRExitZ8Time,
5763 double SREnterPlusExitZ8Time,
5764 double DCFCLKDeepSleep,
5765 unsigned int DETBufferSizeY[],
5766 unsigned int DETBufferSizeC[],
5767 unsigned int SwathHeightY[],
5768 unsigned int SwathHeightC[],
5769 unsigned int LBBitPerPixel[],
5770 double SwathWidthY[],
5771 double SwathWidthC[],
5773 double HRatioChroma[],
5774 unsigned int vtaps[],
5775 unsigned int VTAPsChroma[],
5777 double VRatioChroma[],
5778 unsigned int HTotal[],
5779 double PixelClock[],
5780 unsigned int BlendingAndTiming[],
5781 unsigned int DPPPerPlane[],
5782 double BytePerPixelDETY[],
5783 double BytePerPixelDETC[],
5784 double DSTXAfterScaler[],
5785 double DSTYAfterScaler[],
5786 bool WritebackEnable[],
5787 enum source_format_class WritebackPixelFormat[],
5788 double WritebackDestinationWidth[],
5789 double WritebackDestinationHeight[],
5790 double WritebackSourceHeight[],
5791 bool UnboundedRequestEnabled,
5792 int unsigned CompressedBufferSizeInkByte,
5793 enum clock_change_support *DRAMClockChangeSupport,
5794 double *UrgentWatermark,
5795 double *WritebackUrgentWatermark,
5796 double *DRAMClockChangeWatermark,
5797 double *WritebackDRAMClockChangeWatermark,
5798 double *StutterExitWatermark,
5799 double *StutterEnterPlusExitWatermark,
5800 double *Z8StutterExitWatermark,
5801 double *Z8StutterEnterPlusExitWatermark,
5802 double *MinActiveDRAMClockChangeLatencySupported)
5804 struct vba_vars_st *v = &mode_lib->vba;
5805 double EffectiveLBLatencyHidingY;
5806 double EffectiveLBLatencyHidingC;
5807 double LinesInDETY[DC__NUM_DPP__MAX];
5809 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5810 unsigned int LinesInDETCRoundedDownToSwath;
5811 double FullDETBufferingTimeY;
5812 double FullDETBufferingTimeC;
5813 double ActiveDRAMClockChangeLatencyMarginY;
5814 double ActiveDRAMClockChangeLatencyMarginC;
5815 double WritebackDRAMClockChangeLatencyMargin;
5816 double PlaneWithMinActiveDRAMClockChangeMargin;
5817 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5818 double WritebackDRAMClockChangeLatencyHiding;
5819 double TotalPixelBW = 0.0;
5822 *UrgentWatermark = UrgentLatency + ExtraLatency;
5824 #ifdef __DML_VBA_DEBUG__
5825 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
5826 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
5827 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, *UrgentWatermark);
5830 *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
5832 #ifdef __DML_VBA_DEBUG__
5833 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, DRAMClockChangeLatency);
5834 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, *DRAMClockChangeWatermark);
5837 v->TotalActiveWriteback = 0;
5838 for (k = 0; k < NumberOfActivePlanes; ++k) {
5839 if (WritebackEnable[k] == true) {
5840 v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
5844 if (v->TotalActiveWriteback <= 1) {
5845 *WritebackUrgentWatermark = WritebackLatency;
5847 *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5850 if (v->TotalActiveWriteback <= 1) {
5851 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
5853 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5856 for (k = 0; k < NumberOfActivePlanes; ++k) {
5857 TotalPixelBW = TotalPixelBW
5858 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k])
5859 / (HTotal[k] / PixelClock[k]);
5862 for (k = 0; k < NumberOfActivePlanes; ++k) {
5863 double EffectiveDETBufferSizeY = DETBufferSizeY[k];
5865 v->LBLatencyHidingSourceLinesY = dml_min(
5866 (double) MaxLineBufferLines,
5867 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1);
5869 v->LBLatencyHidingSourceLinesC = dml_min(
5870 (double) MaxLineBufferLines,
5871 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1);
5873 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]);
5875 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
5877 if (UnboundedRequestEnabled) {
5878 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
5879 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] / (HTotal[k] / PixelClock[k]) / TotalPixelBW;
5882 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5883 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5884 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
5885 if (BytePerPixelDETC[k] > 0) {
5886 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5887 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5888 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k];
5891 FullDETBufferingTimeC = 999999;
5894 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
5895 - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark;
5897 if (NumberOfActivePlanes > 1) {
5898 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5899 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
5902 if (BytePerPixelDETC[k] > 0) {
5903 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
5904 - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark;
5906 if (NumberOfActivePlanes > 1) {
5907 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5908 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k];
5910 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5912 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5915 if (WritebackEnable[k] == true) {
5916 WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024
5917 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
5918 if (WritebackPixelFormat[k] == dm_444_64) {
5919 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5921 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
5922 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5926 v->MinActiveDRAMClockChangeMargin = 999999;
5927 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5928 for (k = 0; k < NumberOfActivePlanes; ++k) {
5929 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
5930 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
5931 if (BlendingAndTiming[k] == k) {
5932 PlaneWithMinActiveDRAMClockChangeMargin = k;
5934 for (j = 0; j < NumberOfActivePlanes; ++j) {
5935 if (BlendingAndTiming[k] == j) {
5936 PlaneWithMinActiveDRAMClockChangeMargin = j;
5943 *MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
5945 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5946 for (k = 0; k < NumberOfActivePlanes; ++k) {
5947 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5948 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5949 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
5953 v->TotalNumberOfActiveOTG = 0;
5955 for (k = 0; k < NumberOfActivePlanes; ++k) {
5956 if (BlendingAndTiming[k] == k) {
5957 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
5961 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5962 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5963 } else if ((SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
5964 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
5965 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5967 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5970 *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5971 *StutterEnterPlusExitWatermark = (SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
5972 *Z8StutterExitWatermark = SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5973 *Z8StutterEnterPlusExitWatermark = SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5975 #ifdef __DML_VBA_DEBUG__
5976 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
5977 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
5978 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
5979 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
5983 static void CalculateDCFCLKDeepSleep(
5984 struct display_mode_lib *mode_lib,
5985 unsigned int NumberOfActivePlanes,
5986 int BytePerPixelY[],
5987 int BytePerPixelC[],
5989 double VRatioChroma[],
5990 double SwathWidthY[],
5991 double SwathWidthC[],
5992 unsigned int DPPPerPlane[],
5994 double HRatioChroma[],
5995 double PixelClock[],
5996 double PSCL_THROUGHPUT[],
5997 double PSCL_THROUGHPUT_CHROMA[],
5999 double ReadBandwidthLuma[],
6000 double ReadBandwidthChroma[],
6002 double *DCFCLKDeepSleep)
6004 struct vba_vars_st *v = &mode_lib->vba;
6005 double DisplayPipeLineDeliveryTimeLuma;
6006 double DisplayPipeLineDeliveryTimeChroma;
6007 double ReadBandwidth = 0.0;
6010 for (k = 0; k < NumberOfActivePlanes; ++k) {
6012 if (VRatio[k] <= 1) {
6013 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6015 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6017 if (BytePerPixelC[k] == 0) {
6018 DisplayPipeLineDeliveryTimeChroma = 0;
6020 if (VRatioChroma[k] <= 1) {
6021 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6023 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6027 if (BytePerPixelC[k] > 0) {
6028 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
6029 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
6031 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
6033 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
6037 for (k = 0; k < NumberOfActivePlanes; ++k) {
6038 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
6041 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
6043 for (k = 0; k < NumberOfActivePlanes; ++k) {
6044 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]);
6048 static void CalculateUrgentBurstFactor(
6049 int swath_width_luma_ub,
6050 int swath_width_chroma_ub,
6051 unsigned int SwathHeightY,
6052 unsigned int SwathHeightC,
6054 double UrgentLatency,
6055 double CursorBufferSize,
6056 unsigned int CursorWidth,
6057 unsigned int CursorBPP,
6060 double BytePerPixelInDETY,
6061 double BytePerPixelInDETC,
6062 double DETBufferSizeY,
6063 double DETBufferSizeC,
6064 double *UrgentBurstFactorCursor,
6065 double *UrgentBurstFactorLuma,
6066 double *UrgentBurstFactorChroma,
6067 bool *NotEnoughUrgentLatencyHiding)
6069 double LinesInDETLuma;
6070 double LinesInDETChroma;
6071 unsigned int LinesInCursorBuffer;
6072 double CursorBufferSizeInTime;
6073 double DETBufferSizeInTimeLuma;
6074 double DETBufferSizeInTimeChroma;
6076 *NotEnoughUrgentLatencyHiding = 0;
6078 if (CursorWidth > 0) {
6079 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
6081 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
6082 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
6083 *NotEnoughUrgentLatencyHiding = 1;
6084 *UrgentBurstFactorCursor = 0;
6086 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
6089 *UrgentBurstFactorCursor = 1;
6093 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
6095 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
6096 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
6097 *NotEnoughUrgentLatencyHiding = 1;
6098 *UrgentBurstFactorLuma = 0;
6100 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
6103 *UrgentBurstFactorLuma = 1;
6106 if (BytePerPixelInDETC > 0) {
6107 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
6109 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
6110 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
6111 *NotEnoughUrgentLatencyHiding = 1;
6112 *UrgentBurstFactorChroma = 0;
6114 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
6117 *UrgentBurstFactorChroma = 1;
6122 static void CalculatePixelDeliveryTimes(
6123 unsigned int NumberOfActivePlanes,
6125 double VRatioChroma[],
6126 double VRatioPrefetchY[],
6127 double VRatioPrefetchC[],
6128 unsigned int swath_width_luma_ub[],
6129 unsigned int swath_width_chroma_ub[],
6130 unsigned int DPPPerPlane[],
6132 double HRatioChroma[],
6133 double PixelClock[],
6134 double PSCL_THROUGHPUT[],
6135 double PSCL_THROUGHPUT_CHROMA[],
6137 int BytePerPixelC[],
6138 enum scan_direction_class SourceScan[],
6139 unsigned int NumberOfCursors[],
6140 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
6141 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
6142 unsigned int BlockWidth256BytesY[],
6143 unsigned int BlockHeight256BytesY[],
6144 unsigned int BlockWidth256BytesC[],
6145 unsigned int BlockHeight256BytesC[],
6146 double DisplayPipeLineDeliveryTimeLuma[],
6147 double DisplayPipeLineDeliveryTimeChroma[],
6148 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
6149 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
6150 double DisplayPipeRequestDeliveryTimeLuma[],
6151 double DisplayPipeRequestDeliveryTimeChroma[],
6152 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
6153 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
6154 double CursorRequestDeliveryTime[],
6155 double CursorRequestDeliveryTimePrefetch[])
6157 double req_per_swath_ub;
6160 for (k = 0; k < NumberOfActivePlanes; ++k) {
6161 if (VRatio[k] <= 1) {
6162 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6164 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6167 if (BytePerPixelC[k] == 0) {
6168 DisplayPipeLineDeliveryTimeChroma[k] = 0;
6170 if (VRatioChroma[k] <= 1) {
6171 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6173 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6177 if (VRatioPrefetchY[k] <= 1) {
6178 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6180 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6183 if (BytePerPixelC[k] == 0) {
6184 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
6186 if (VRatioPrefetchC[k] <= 1) {
6187 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6189 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6194 for (k = 0; k < NumberOfActivePlanes; ++k) {
6195 if (SourceScan[k] != dm_vert) {
6196 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
6198 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
6200 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
6201 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
6202 if (BytePerPixelC[k] == 0) {
6203 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
6204 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
6206 if (SourceScan[k] != dm_vert) {
6207 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
6209 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
6211 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
6212 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
6214 #ifdef __DML_VBA_DEBUG__
6215 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
6216 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
6217 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
6218 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
6219 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
6220 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
6221 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
6222 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
6223 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
6224 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
6225 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
6226 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
6230 for (k = 0; k < NumberOfActivePlanes; ++k) {
6231 int cursor_req_per_width;
6232 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
6233 if (NumberOfCursors[k] > 0) {
6234 if (VRatio[k] <= 1) {
6235 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6237 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6239 if (VRatioPrefetchY[k] <= 1) {
6240 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6242 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6245 CursorRequestDeliveryTime[k] = 0;
6246 CursorRequestDeliveryTimePrefetch[k] = 0;
6248 #ifdef __DML_VBA_DEBUG__
6249 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
6250 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
6251 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
6256 static void CalculateMetaAndPTETimes(
6257 int NumberOfActivePlanes,
6260 int MinMetaChunkSizeBytes,
6263 double VRatioChroma[],
6264 double DestinationLinesToRequestRowInVBlank[],
6265 double DestinationLinesToRequestRowInImmediateFlip[],
6267 double PixelClock[],
6268 int BytePerPixelY[],
6269 int BytePerPixelC[],
6270 enum scan_direction_class SourceScan[],
6271 int dpte_row_height[],
6272 int dpte_row_height_chroma[],
6273 int meta_row_width[],
6274 int meta_row_width_chroma[],
6275 int meta_row_height[],
6276 int meta_row_height_chroma[],
6277 int meta_req_width[],
6278 int meta_req_width_chroma[],
6279 int meta_req_height[],
6280 int meta_req_height_chroma[],
6281 int dpte_group_bytes[],
6282 int PTERequestSizeY[],
6283 int PTERequestSizeC[],
6284 int PixelPTEReqWidthY[],
6285 int PixelPTEReqHeightY[],
6286 int PixelPTEReqWidthC[],
6287 int PixelPTEReqHeightC[],
6288 int dpte_row_width_luma_ub[],
6289 int dpte_row_width_chroma_ub[],
6290 double DST_Y_PER_PTE_ROW_NOM_L[],
6291 double DST_Y_PER_PTE_ROW_NOM_C[],
6292 double DST_Y_PER_META_ROW_NOM_L[],
6293 double DST_Y_PER_META_ROW_NOM_C[],
6294 double TimePerMetaChunkNominal[],
6295 double TimePerChromaMetaChunkNominal[],
6296 double TimePerMetaChunkVBlank[],
6297 double TimePerChromaMetaChunkVBlank[],
6298 double TimePerMetaChunkFlip[],
6299 double TimePerChromaMetaChunkFlip[],
6300 double time_per_pte_group_nom_luma[],
6301 double time_per_pte_group_vblank_luma[],
6302 double time_per_pte_group_flip_luma[],
6303 double time_per_pte_group_nom_chroma[],
6304 double time_per_pte_group_vblank_chroma[],
6305 double time_per_pte_group_flip_chroma[])
6307 unsigned int meta_chunk_width;
6308 unsigned int min_meta_chunk_width;
6309 unsigned int meta_chunk_per_row_int;
6310 unsigned int meta_row_remainder;
6311 unsigned int meta_chunk_threshold;
6312 unsigned int meta_chunks_per_row_ub;
6313 unsigned int meta_chunk_width_chroma;
6314 unsigned int min_meta_chunk_width_chroma;
6315 unsigned int meta_chunk_per_row_int_chroma;
6316 unsigned int meta_row_remainder_chroma;
6317 unsigned int meta_chunk_threshold_chroma;
6318 unsigned int meta_chunks_per_row_ub_chroma;
6319 unsigned int dpte_group_width_luma;
6320 unsigned int dpte_groups_per_row_luma_ub;
6321 unsigned int dpte_group_width_chroma;
6322 unsigned int dpte_groups_per_row_chroma_ub;
6325 for (k = 0; k < NumberOfActivePlanes; ++k) {
6326 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
6327 if (BytePerPixelC[k] == 0) {
6328 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
6330 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
6332 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
6333 if (BytePerPixelC[k] == 0) {
6334 DST_Y_PER_META_ROW_NOM_C[k] = 0;
6336 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
6340 for (k = 0; k < NumberOfActivePlanes; ++k) {
6341 if (DCCEnable[k] == true) {
6342 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
6343 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
6344 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
6345 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
6346 if (SourceScan[k] != dm_vert) {
6347 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
6349 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
6351 if (meta_row_remainder <= meta_chunk_threshold) {
6352 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
6354 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
6356 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6357 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6358 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6359 if (BytePerPixelC[k] == 0) {
6360 TimePerChromaMetaChunkNominal[k] = 0;
6361 TimePerChromaMetaChunkVBlank[k] = 0;
6362 TimePerChromaMetaChunkFlip[k] = 0;
6364 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6365 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6366 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
6367 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
6368 if (SourceScan[k] != dm_vert) {
6369 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
6371 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
6373 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
6374 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
6376 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
6378 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6379 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6380 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6383 TimePerMetaChunkNominal[k] = 0;
6384 TimePerMetaChunkVBlank[k] = 0;
6385 TimePerMetaChunkFlip[k] = 0;
6386 TimePerChromaMetaChunkNominal[k] = 0;
6387 TimePerChromaMetaChunkVBlank[k] = 0;
6388 TimePerChromaMetaChunkFlip[k] = 0;
6392 for (k = 0; k < NumberOfActivePlanes; ++k) {
6393 if (GPUVMEnable == true) {
6394 if (SourceScan[k] != dm_vert) {
6395 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
6397 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
6399 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
6400 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6401 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6402 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6403 if (BytePerPixelC[k] == 0) {
6404 time_per_pte_group_nom_chroma[k] = 0;
6405 time_per_pte_group_vblank_chroma[k] = 0;
6406 time_per_pte_group_flip_chroma[k] = 0;
6408 if (SourceScan[k] != dm_vert) {
6409 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
6411 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
6413 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
6414 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6415 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6416 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6419 time_per_pte_group_nom_luma[k] = 0;
6420 time_per_pte_group_vblank_luma[k] = 0;
6421 time_per_pte_group_flip_luma[k] = 0;
6422 time_per_pte_group_nom_chroma[k] = 0;
6423 time_per_pte_group_vblank_chroma[k] = 0;
6424 time_per_pte_group_flip_chroma[k] = 0;
6429 static void CalculateVMGroupAndRequestTimes(
6430 unsigned int NumberOfActivePlanes,
6432 unsigned int GPUVMMaxPageTableLevels,
6433 unsigned int HTotal[],
6434 int BytePerPixelC[],
6435 double DestinationLinesToRequestVMInVBlank[],
6436 double DestinationLinesToRequestVMInImmediateFlip[],
6438 double PixelClock[],
6439 int dpte_row_width_luma_ub[],
6440 int dpte_row_width_chroma_ub[],
6441 int vm_group_bytes[],
6442 unsigned int dpde0_bytes_per_frame_ub_l[],
6443 unsigned int dpde0_bytes_per_frame_ub_c[],
6444 int meta_pte_bytes_per_frame_ub_l[],
6445 int meta_pte_bytes_per_frame_ub_c[],
6446 double TimePerVMGroupVBlank[],
6447 double TimePerVMGroupFlip[],
6448 double TimePerVMRequestVBlank[],
6449 double TimePerVMRequestFlip[])
6451 int num_group_per_lower_vm_stage;
6452 int num_req_per_lower_vm_stage;
6455 for (k = 0; k < NumberOfActivePlanes; ++k) {
6456 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6457 if (DCCEnable[k] == false) {
6458 if (BytePerPixelC[k] > 0) {
6459 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6460 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6462 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6465 if (GPUVMMaxPageTableLevels == 1) {
6466 if (BytePerPixelC[k] > 0) {
6467 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6468 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6470 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6473 if (BytePerPixelC[k] > 0) {
6474 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6475 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6476 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6477 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6479 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6480 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6485 if (DCCEnable[k] == false) {
6486 if (BytePerPixelC[k] > 0) {
6487 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6489 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6492 if (GPUVMMaxPageTableLevels == 1) {
6493 if (BytePerPixelC[k] > 0) {
6494 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6496 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6499 if (BytePerPixelC[k] > 0) {
6500 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
6501 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6503 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6508 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6509 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6510 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6511 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6513 if (GPUVMMaxPageTableLevels > 2) {
6514 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6515 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6516 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6517 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6521 TimePerVMGroupVBlank[k] = 0;
6522 TimePerVMGroupFlip[k] = 0;
6523 TimePerVMRequestVBlank[k] = 0;
6524 TimePerVMRequestFlip[k] = 0;
6529 static void CalculateStutterEfficiency(
6530 struct display_mode_lib *mode_lib,
6531 int CompressedBufferSizeInkByte,
6532 bool UnboundedRequestEnabled,
6533 int ConfigReturnBufferSizeInKByte,
6534 int MetaFIFOSizeInKEntries,
6535 int ZeroSizeBufferEntries,
6536 int NumberOfActivePlanes,
6537 int ROBBufferSizeInKByte,
6538 double TotalDataReadBandwidth,
6541 double COMPBUF_RESERVED_SPACE_64B,
6542 double COMPBUF_RESERVED_SPACE_ZS,
6544 double SRExitZ8Time,
6545 bool SynchronizedVBlank,
6546 double Z8StutterEnterPlusExitWatermark,
6547 double StutterEnterPlusExitWatermark,
6548 bool ProgressiveToInterlaceUnitInOPP,
6550 double MinTTUVBlank[],
6552 unsigned int DETBufferSizeY[],
6553 int BytePerPixelY[],
6554 double BytePerPixelDETY[],
6555 double SwathWidthY[],
6558 double NetDCCRateLuma[],
6559 double NetDCCRateChroma[],
6560 double DCCFractionOfZeroSizeRequestsLuma[],
6561 double DCCFractionOfZeroSizeRequestsChroma[],
6564 double PixelClock[],
6566 enum scan_direction_class SourceScan[],
6567 int BlockHeight256BytesY[],
6568 int BlockWidth256BytesY[],
6569 int BlockHeight256BytesC[],
6570 int BlockWidth256BytesC[],
6571 int DCCYMaxUncompressedBlock[],
6572 int DCCCMaxUncompressedBlock[],
6575 bool WritebackEnable[],
6576 double ReadBandwidthPlaneLuma[],
6577 double ReadBandwidthPlaneChroma[],
6578 double meta_row_bw[],
6579 double dpte_row_bw[],
6580 double *StutterEfficiencyNotIncludingVBlank,
6581 double *StutterEfficiency,
6582 int *NumberOfStutterBurstsPerFrame,
6583 double *Z8StutterEfficiencyNotIncludingVBlank,
6584 double *Z8StutterEfficiency,
6585 int *Z8NumberOfStutterBurstsPerFrame,
6586 double *StutterPeriod)
6588 struct vba_vars_st *v = &mode_lib->vba;
6590 double DETBufferingTimeY;
6591 double SwathWidthYCriticalPlane = 0;
6592 double VActiveTimeCriticalPlane = 0;
6593 double FrameTimeCriticalPlane = 0;
6594 int BytePerPixelYCriticalPlane = 0;
6595 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6596 double MinTTUVBlankCriticalPlane = 0;
6597 double TotalCompressedReadBandwidth;
6598 double TotalRowReadBandwidth;
6599 double AverageDCCCompressionRate;
6600 double EffectiveCompressedBufferSize;
6601 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
6602 double StutterBurstTime;
6603 int TotalActiveWriteback;
6605 double LinesInDETYRoundedDownToSwath;
6606 double MaximumEffectiveCompressionLuma;
6607 double MaximumEffectiveCompressionChroma;
6608 double TotalZeroSizeRequestReadBandwidth;
6609 double TotalZeroSizeCompressedReadBandwidth;
6610 double AverageDCCZeroSizeFraction;
6611 double AverageZeroSizeCompressionRate;
6612 int TotalNumberOfActiveOTG = 0;
6613 double LastStutterPeriod = 0.0;
6614 double LastZ8StutterPeriod = 0.0;
6617 TotalZeroSizeRequestReadBandwidth = 0;
6618 TotalZeroSizeCompressedReadBandwidth = 0;
6619 TotalRowReadBandwidth = 0;
6620 TotalCompressedReadBandwidth = 0;
6622 for (k = 0; k < NumberOfActivePlanes; ++k) {
6623 if (DCCEnable[k] == true) {
6624 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
6625 || DCCYMaxUncompressedBlock[k] < 256) {
6626 MaximumEffectiveCompressionLuma = 2;
6628 MaximumEffectiveCompressionLuma = 4;
6630 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
6631 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
6632 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6633 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
6634 if (ReadBandwidthPlaneChroma[k] > 0) {
6635 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6636 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
6637 MaximumEffectiveCompressionChroma = 2;
6639 MaximumEffectiveCompressionChroma = 4;
6641 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
6642 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
6643 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
6644 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6645 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
6648 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6650 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6653 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
6654 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
6656 #ifdef __DML_VBA_DEBUG__
6657 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
6658 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
6659 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
6660 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
6661 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
6662 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6663 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
6664 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
6667 if (AverageDCCZeroSizeFraction == 1) {
6668 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6669 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
6670 } else if (AverageDCCZeroSizeFraction > 0) {
6671 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6672 EffectiveCompressedBufferSize = dml_min(
6673 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6674 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
6675 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
6676 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6677 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6679 "DML::%s: min 2 = %f\n",
6681 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
6682 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6683 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6685 EffectiveCompressedBufferSize = dml_min(
6686 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6687 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
6688 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6689 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
6692 #ifdef __DML_VBA_DEBUG__
6693 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
6694 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
6695 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6699 for (k = 0; k < NumberOfActivePlanes; ++k) {
6700 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
6701 / BytePerPixelDETY[k] / SwathWidthY[k];
6702 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
6703 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
6704 #ifdef __DML_VBA_DEBUG__
6705 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
6706 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
6707 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
6708 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
6709 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
6710 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
6711 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
6712 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
6713 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6714 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
6715 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
6716 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6719 if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
6720 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
6722 *StutterPeriod = DETBufferingTimeY;
6723 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
6724 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
6725 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6726 SwathWidthYCriticalPlane = SwathWidthY[k];
6727 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
6728 MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
6730 #ifdef __DML_VBA_DEBUG__
6731 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6732 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
6733 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
6734 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6735 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
6736 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
6737 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
6742 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize);
6743 #ifdef __DML_VBA_DEBUG__
6744 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
6745 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6746 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
6747 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
6748 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6749 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
6750 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6751 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
6752 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
6753 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
6756 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
6757 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
6758 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6759 #ifdef __DML_VBA_DEBUG__
6760 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
6761 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
6762 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
6763 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
6764 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6766 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6769 "DML::%s: Time to finish residue swath=%f\n",
6771 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6773 TotalActiveWriteback = 0;
6774 for (k = 0; k < NumberOfActivePlanes; ++k) {
6775 if (WritebackEnable[k]) {
6776 TotalActiveWriteback = TotalActiveWriteback + 1;
6780 if (TotalActiveWriteback == 0) {
6781 #ifdef __DML_VBA_DEBUG__
6782 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
6783 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
6784 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
6785 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6787 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
6788 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
6789 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6790 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6792 *StutterEfficiencyNotIncludingVBlank = 0.;
6793 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
6794 *NumberOfStutterBurstsPerFrame = 0;
6795 *Z8NumberOfStutterBurstsPerFrame = 0;
6797 #ifdef __DML_VBA_DEBUG__
6798 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6799 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6800 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
6801 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
6802 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6805 for (k = 0; k < NumberOfActivePlanes; ++k) {
6806 if (v->BlendingAndTiming[k] == k) {
6807 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6811 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6812 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6814 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
6815 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
6816 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6818 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6821 *StutterEfficiency = 0;
6824 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6825 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6826 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
6827 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
6828 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6830 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6833 *Z8StutterEfficiency = 0.;
6836 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6837 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6838 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6839 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6840 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6841 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6842 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6843 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6846 static void CalculateSwathAndDETConfiguration(
6847 bool ForceSingleDPP,
6848 int NumberOfActivePlanes,
6849 unsigned int DETBufferSizeInKByte,
6850 double MaximumSwathWidthLuma[],
6851 double MaximumSwathWidthChroma[],
6852 enum scan_direction_class SourceScan[],
6853 enum source_format_class SourcePixelFormat[],
6854 enum dm_swizzle_mode SurfaceTiling[],
6855 int ViewportWidth[],
6856 int ViewportHeight[],
6857 int SurfaceWidthY[],
6858 int SurfaceWidthC[],
6859 int SurfaceHeightY[],
6860 int SurfaceHeightC[],
6861 int Read256BytesBlockHeightY[],
6862 int Read256BytesBlockHeightC[],
6863 int Read256BytesBlockWidthY[],
6864 int Read256BytesBlockWidthC[],
6865 enum odm_combine_mode ODMCombineEnabled[],
6866 int BlendingAndTiming[],
6869 double BytePerPixDETY[],
6870 double BytePerPixDETC[],
6873 double HRatioChroma[],
6875 int swath_width_luma_ub[],
6876 int swath_width_chroma_ub[],
6877 double SwathWidth[],
6878 double SwathWidthChroma[],
6881 unsigned int DETBufferSizeY[],
6882 unsigned int DETBufferSizeC[],
6883 bool ViewportSizeSupportPerPlane[],
6884 bool *ViewportSizeSupport)
6886 int MaximumSwathHeightY[DC__NUM_DPP__MAX];
6887 int MaximumSwathHeightC[DC__NUM_DPP__MAX];
6888 int MinimumSwathHeightY;
6889 int MinimumSwathHeightC;
6890 int RoundedUpMaxSwathSizeBytesY;
6891 int RoundedUpMaxSwathSizeBytesC;
6892 int RoundedUpMinSwathSizeBytesY;
6893 int RoundedUpMinSwathSizeBytesC;
6894 int RoundedUpSwathSizeBytesY;
6895 int RoundedUpSwathSizeBytesC;
6896 double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
6897 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
6900 CalculateSwathWidth(
6902 NumberOfActivePlanes,
6914 Read256BytesBlockHeightY,
6915 Read256BytesBlockHeightC,
6916 Read256BytesBlockWidthY,
6917 Read256BytesBlockWidthC,
6922 SwathWidthSingleDPP,
6923 SwathWidthSingleDPPChroma,
6926 MaximumSwathHeightY,
6927 MaximumSwathHeightC,
6928 swath_width_luma_ub,
6929 swath_width_chroma_ub);
6931 *ViewportSizeSupport = true;
6932 for (k = 0; k < NumberOfActivePlanes; ++k) {
6933 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
6934 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
6935 if (SurfaceTiling[k] == dm_sw_linear
6936 || (SourcePixelFormat[k] == dm_444_64
6937 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6938 && SourceScan[k] != dm_vert)) {
6939 MinimumSwathHeightY = MaximumSwathHeightY[k];
6940 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6941 MinimumSwathHeightY = MaximumSwathHeightY[k];
6943 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6945 MinimumSwathHeightC = MaximumSwathHeightC[k];
6947 if (SurfaceTiling[k] == dm_sw_linear) {
6948 MinimumSwathHeightY = MaximumSwathHeightY[k];
6949 MinimumSwathHeightC = MaximumSwathHeightC[k];
6950 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
6951 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6952 MinimumSwathHeightC = MaximumSwathHeightC[k];
6953 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6954 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6955 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6956 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6957 MinimumSwathHeightY = MaximumSwathHeightY[k];
6958 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6960 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6961 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6965 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
6966 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
6967 if (SourcePixelFormat[k] == dm_420_10) {
6968 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6969 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6971 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
6972 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
6973 if (SourcePixelFormat[k] == dm_420_10) {
6974 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6975 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6978 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6979 SwathHeightY[k] = MaximumSwathHeightY[k];
6980 SwathHeightC[k] = MaximumSwathHeightC[k];
6981 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6982 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6983 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6984 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6985 SwathHeightY[k] = MinimumSwathHeightY;
6986 SwathHeightC[k] = MaximumSwathHeightC[k];
6987 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6988 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6989 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6990 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6991 SwathHeightY[k] = MaximumSwathHeightY[k];
6992 SwathHeightC[k] = MinimumSwathHeightC;
6993 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6994 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6996 SwathHeightY[k] = MinimumSwathHeightY;
6997 SwathHeightC[k] = MinimumSwathHeightC;
6998 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6999 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
7002 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
7003 if (SwathHeightC[k] == 0) {
7004 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
7005 DETBufferSizeC[k] = 0;
7006 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
7007 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
7008 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
7010 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024);
7011 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
7014 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
7015 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
7016 *ViewportSizeSupport = false;
7017 ViewportSizeSupportPerPlane[k] = false;
7019 ViewportSizeSupportPerPlane[k] = true;
7025 static void CalculateSwathWidth(
7026 bool ForceSingleDPP,
7027 int NumberOfActivePlanes,
7028 enum source_format_class SourcePixelFormat[],
7029 enum scan_direction_class SourceScan[],
7030 int ViewportWidth[],
7031 int ViewportHeight[],
7032 int SurfaceWidthY[],
7033 int SurfaceWidthC[],
7034 int SurfaceHeightY[],
7035 int SurfaceHeightC[],
7036 enum odm_combine_mode ODMCombineEnabled[],
7039 int Read256BytesBlockHeightY[],
7040 int Read256BytesBlockHeightC[],
7041 int Read256BytesBlockWidthY[],
7042 int Read256BytesBlockWidthC[],
7043 int BlendingAndTiming[],
7047 double SwathWidthSingleDPPY[],
7048 double SwathWidthSingleDPPC[],
7049 double SwathWidthY[],
7050 double SwathWidthC[],
7051 int MaximumSwathHeightY[],
7052 int MaximumSwathHeightC[],
7053 int swath_width_luma_ub[],
7054 int swath_width_chroma_ub[])
7056 enum odm_combine_mode MainPlaneODMCombine;
7059 #ifdef __DML_VBA_DEBUG__
7060 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
7063 for (k = 0; k < NumberOfActivePlanes; ++k) {
7064 if (SourceScan[k] != dm_vert) {
7065 SwathWidthSingleDPPY[k] = ViewportWidth[k];
7067 SwathWidthSingleDPPY[k] = ViewportHeight[k];
7070 #ifdef __DML_VBA_DEBUG__
7071 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
7072 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
7075 MainPlaneODMCombine = ODMCombineEnabled[k];
7076 for (j = 0; j < NumberOfActivePlanes; ++j) {
7077 if (BlendingAndTiming[k] == j) {
7078 MainPlaneODMCombine = ODMCombineEnabled[j];
7082 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
7083 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
7084 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
7085 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
7086 } else if (DPPPerPlane[k] == 2) {
7087 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
7089 SwathWidthY[k] = SwathWidthSingleDPPY[k];
7092 #ifdef __DML_VBA_DEBUG__
7093 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
7094 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
7097 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
7098 SwathWidthC[k] = SwathWidthY[k] / 2;
7099 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
7101 SwathWidthC[k] = SwathWidthY[k];
7102 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
7105 if (ForceSingleDPP == true) {
7106 SwathWidthY[k] = SwathWidthSingleDPPY[k];
7107 SwathWidthC[k] = SwathWidthSingleDPPC[k];
7110 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
7111 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
7112 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
7113 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
7115 #ifdef __DML_VBA_DEBUG__
7116 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
7119 if (SourceScan[k] != dm_vert) {
7120 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
7121 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
7122 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
7123 if (BytePerPixC[k] > 0) {
7124 swath_width_chroma_ub[k] = dml_min(
7126 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
7128 swath_width_chroma_ub[k] = 0;
7131 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
7132 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
7133 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
7134 if (BytePerPixC[k] > 0) {
7135 swath_width_chroma_ub[k] = dml_min(
7136 surface_height_ub_c,
7137 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
7139 swath_width_chroma_ub[k] = 0;
7146 static double CalculateExtraLatency(
7147 int RoundTripPingLatencyCycles,
7148 int ReorderingBytes,
7150 int TotalNumberOfActiveDPP,
7151 int PixelChunkSizeInKByte,
7152 int TotalNumberOfDCCActiveDPP,
7157 int NumberOfActivePlanes,
7159 int dpte_group_bytes[],
7160 double HostVMInefficiencyFactor,
7161 double HostVMMinPageSize,
7162 int HostVMMaxNonCachedPageTableLevels)
7164 double ExtraLatencyBytes;
7165 double ExtraLatency;
7167 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7169 TotalNumberOfActiveDPP,
7170 PixelChunkSizeInKByte,
7171 TotalNumberOfDCCActiveDPP,
7175 NumberOfActivePlanes,
7178 HostVMInefficiencyFactor,
7180 HostVMMaxNonCachedPageTableLevels);
7182 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
7184 #ifdef __DML_VBA_DEBUG__
7185 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
7186 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
7187 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
7188 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
7189 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
7192 return ExtraLatency;
7195 static double CalculateExtraLatencyBytes(
7196 int ReorderingBytes,
7197 int TotalNumberOfActiveDPP,
7198 int PixelChunkSizeInKByte,
7199 int TotalNumberOfDCCActiveDPP,
7203 int NumberOfActivePlanes,
7205 int dpte_group_bytes[],
7206 double HostVMInefficiencyFactor,
7207 double HostVMMinPageSize,
7208 int HostVMMaxNonCachedPageTableLevels)
7211 int HostVMDynamicLevels = 0, k;
7213 if (GPUVMEnable == true && HostVMEnable == true) {
7214 if (HostVMMinPageSize < 2048) {
7215 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
7216 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
7217 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
7219 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
7222 HostVMDynamicLevels = 0;
7225 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
7227 if (GPUVMEnable == true) {
7228 for (k = 0; k < NumberOfActivePlanes; ++k) {
7229 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
7235 static double CalculateUrgentLatency(
7236 double UrgentLatencyPixelDataOnly,
7237 double UrgentLatencyPixelMixedWithVMData,
7238 double UrgentLatencyVMDataOnly,
7239 bool DoUrgentLatencyAdjustment,
7240 double UrgentLatencyAdjustmentFabricClockComponent,
7241 double UrgentLatencyAdjustmentFabricClockReference,
7246 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
7247 if (DoUrgentLatencyAdjustment == true) {
7248 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
7253 static void UseMinimumDCFCLK(
7254 struct display_mode_lib *mode_lib,
7255 int MaxInterDCNTileRepeaters,
7256 int MaxPrefetchMode,
7257 double FinalDRAMClockChangeLatency,
7258 double SREnterPlusExitTime,
7260 int RoundTripPingLatencyCycles,
7261 int ReorderingBytes,
7262 int PixelChunkSizeInKByte,
7265 int GPUVMMaxPageTableLevels,
7267 int NumberOfActivePlanes,
7268 double HostVMMinPageSize,
7269 int HostVMMaxNonCachedPageTableLevels,
7270 bool DynamicMetadataVMEnabled,
7271 enum immediate_flip_requirement ImmediateFlipRequirement,
7272 bool ProgressiveToInterlaceUnitInOPP,
7273 double MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation,
7274 double PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency,
7277 int DynamicMetadataTransmittedBytes[],
7278 int DynamicMetadataLinesBeforeActiveRequired[],
7280 double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
7281 double RequiredDISPCLK[][2],
7282 double UrgLatency[],
7283 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
7284 double ProjectedDCFCLKDeepSleep[][2],
7285 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
7286 double TotalVActivePixelBandwidth[][2],
7287 double TotalVActiveCursorBandwidth[][2],
7288 double TotalMetaRowBandwidth[][2],
7289 double TotalDPTERowBandwidth[][2],
7290 unsigned int TotalNumberOfActiveDPP[][2],
7291 unsigned int TotalNumberOfDCCActiveDPP[][2],
7292 int dpte_group_bytes[],
7293 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
7294 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
7295 int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
7296 int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
7297 int BytePerPixelY[],
7298 int BytePerPixelC[],
7300 double PixelClock[],
7301 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
7302 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
7303 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
7304 bool DynamicMetadataEnable[],
7305 double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
7306 double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
7307 double ReadBandwidthLuma[],
7308 double ReadBandwidthChroma[],
7309 double DCFCLKPerState[],
7310 double DCFCLKState[][2])
7312 struct vba_vars_st *v = &mode_lib->vba;
7313 int dummy1, i, j, k;
7314 double NormalEfficiency, dummy2, dummy3;
7315 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
7317 NormalEfficiency = PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
7318 for (i = 0; i < v->soc.num_states; ++i) {
7319 for (j = 0; j <= 1; ++j) {
7320 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
7321 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
7322 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
7323 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
7324 double MinimumTWait;
7325 double NonDPTEBandwidth;
7326 double DPTEBandwidth;
7327 double DCFCLKRequiredForAverageBandwidth;
7328 double ExtraLatencyBytes;
7329 double ExtraLatencyCycles;
7330 double DCFCLKRequiredForPeakBandwidth;
7331 int NoOfDPPState[DC__NUM_DPP__MAX];
7332 double MinimumTvmPlus2Tr0;
7334 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
7335 for (k = 0; k < NumberOfActivePlanes; ++k) {
7336 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
7337 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] / (15.75 * HTotal[k] / PixelClock[k]);
7340 for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
7341 NoOfDPPState[k] = NoOfDPP[i][j][k];
7344 MinimumTWait = CalculateTWait(MaxPrefetchMode, FinalDRAMClockChangeLatency, UrgLatency[i], SREnterPlusExitTime);
7345 NonDPTEBandwidth = TotalVActivePixelBandwidth[i][j] + TotalVActiveCursorBandwidth[i][j] + TotalMetaRowBandwidth[i][j];
7346 DPTEBandwidth = (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) ?
7347 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : TotalDPTERowBandwidth[i][j];
7348 DCFCLKRequiredForAverageBandwidth = dml_max3(
7349 ProjectedDCFCLKDeepSleep[i][j],
7350 (NonDPTEBandwidth + TotalDPTERowBandwidth[i][j]) / ReturnBusWidth
7351 / (MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
7352 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / ReturnBusWidth);
7354 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7356 TotalNumberOfActiveDPP[i][j],
7357 PixelChunkSizeInKByte,
7358 TotalNumberOfDCCActiveDPP[i][j],
7362 NumberOfActivePlanes,
7367 HostVMMaxNonCachedPageTableLevels);
7368 ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
7369 for (k = 0; k < NumberOfActivePlanes; ++k) {
7370 double DCFCLKCyclesRequiredInPrefetch;
7371 double ExpectedPrefetchBWAcceleration;
7372 double PrefetchTime;
7374 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
7375 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] * BytePerPixelC[k]) / NormalEfficiency / ReturnBusWidth;
7376 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
7377 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / ReturnBusWidth * (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
7378 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / ReturnBusWidth
7379 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
7380 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) * HTotal[k] / PixelClock[k];
7381 ExpectedPrefetchBWAcceleration = (VActivePixelBandwidth[i][j][k] + VActiveCursorBandwidth[i][j][k])
7382 / (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
7383 DynamicMetadataVMExtraLatency[k] =
7384 (GPUVMEnable == true && DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
7385 UrgLatency[i] * GPUVMMaxPageTableLevels * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
7386 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - MinimumTWait
7388 * ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels : GPUVMMaxPageTableLevels - 2)
7389 * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
7390 - DynamicMetadataVMExtraLatency[k];
7392 if (PrefetchTime > 0) {
7393 double ExpectedVRatioPrefetch;
7394 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
7395 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
7396 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
7397 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
7398 if (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) {
7399 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
7400 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / ReturnBusWidth;
7403 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
7405 if (DynamicMetadataEnable[k] == true) {
7410 double AllowedTimeForUrgentExtraLatency;
7412 CalculateVupdateAndDynamicMetadataParameters(
7413 MaxInterDCNTileRepeaters,
7414 RequiredDPPCLK[i][j][k],
7415 RequiredDISPCLK[i][j],
7416 ProjectedDCFCLKDeepSleep[i][j],
7419 VTotal[k] - VActive[k],
7420 DynamicMetadataTransmittedBytes[k],
7421 DynamicMetadataLinesBeforeActiveRequired[k],
7423 ProgressiveToInterlaceUnitInOPP,
7431 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
7432 - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
7433 if (AllowedTimeForUrgentExtraLatency > 0) {
7434 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
7435 DCFCLKRequiredForPeakBandwidthPerPlane[k],
7436 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
7438 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
7442 DCFCLKRequiredForPeakBandwidth = 0;
7443 for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
7444 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
7446 MinimumTvmPlus2Tr0 = UrgLatency[i]
7447 * (GPUVMEnable == true ?
7448 (HostVMEnable == true ?
7449 (GPUVMMaxPageTableLevels + 2) * (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) :
7451 for (k = 0; k < NumberOfActivePlanes; ++k) {
7452 double MaximumTvmPlus2Tr0PlusTsw;
7453 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
7454 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
7455 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
7457 DCFCLKRequiredForPeakBandwidth = dml_max3(
7458 DCFCLKRequiredForPeakBandwidth,
7459 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
7460 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
7463 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
7468 static void CalculateUnboundedRequestAndCompressedBufferSize(
7469 unsigned int DETBufferSizeInKByte,
7470 int ConfigReturnBufferSizeInKByte,
7471 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
7473 bool NoChromaPlanes,
7475 int CompressedBufferSegmentSizeInkByteFinal,
7476 enum output_encoder_class *Output,
7477 bool *UnboundedRequestEnabled,
7478 int *CompressedBufferSizeInkByte)
7480 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
7482 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]);
7483 *CompressedBufferSizeInkByte = (
7484 *UnboundedRequestEnabled == true ?
7485 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
7486 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
7487 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
7489 #ifdef __DML_VBA_DEBUG__
7490 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
7491 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
7492 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
7493 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
7494 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
7495 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
7496 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
7500 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
7502 bool ret_val = false;
7504 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
7505 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) {