Merge tag 'amd-drm-fixes-6.0-2022-08-17' of https://gitlab.freedesktop.org/agd5f...
authorDave Airlie <airlied@redhat.com>
Thu, 18 Aug 2022 23:45:21 +0000 (09:45 +1000)
committerDave Airlie <airlied@redhat.com>
Thu, 18 Aug 2022 23:45:22 +0000 (09:45 +1000)
amd-drm-fixes-6.0-2022-08-17:

amdgpu:
- Revert some DML stack changes
- Rounding fixes in KFD allocations
- atombios vram info table parsing fix
- DCN 3.1.4 fixes
- Clockgating fixes for various new IPs
- SMU 13.0.4 fixes
- DCN 3.1.4 FP fixes
- TMDS fixes for YCbCr420 4k modes
- DCN 3.2.x fixes
- USB 4 fixes
- SMU 13.0 fixes
- SMU driver unload memory leak fixes
- Display orientation fix
- Regression fix for generic fbdev conversion
- SDMA 6.x fixes
- SR-IOV fixes
- IH 6.x fixes
- Use after free fix in bo list handling
- Revert pipe1 support
- XGMI hive reset fix

amdkfd:
- Fix potential crach in kfd_create_indirect_link_prop()

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220818025206.6463-1-alexander.deucher@amd.com
108 files changed:
drivers/gpu/drm/amd/amdgpu/aldebaran.c
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
drivers/gpu/drm/amd/amdgpu/navi10_ih.c
drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
drivers/gpu/drm/amd/amdgpu/soc21.c
drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
drivers/gpu/drm/amd/amdgpu/vega10_ih.c
drivers/gpu/drm/amd/amdgpu/vega20_ih.c
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
drivers/gpu/drm/amd/amdkfd/kfd_device.c
drivers/gpu/drm/amd/amdkfd/kfd_events.c
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_svm.c
drivers/gpu/drm/amd/amdkfd/kfd_svm.h
drivers/gpu/drm/amd/amdkfd/kfd_topology.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
drivers/gpu/drm/amd/display/dc/basics/conversion.c
drivers/gpu/drm/amd/display/dc/basics/conversion.h
drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h
drivers/gpu/drm/amd/display/dc/core/dc.c
drivers/gpu/drm/amd/display/dc/core/dc_link.c
drivers/gpu/drm/amd/display/dc/core/dc_resource.c
drivers/gpu/drm/amd/display/dc/dc.h
drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
drivers/gpu/drm/amd/display/dc/dc_link.h
drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c
drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h
drivers/gpu/drm/amd/display/dc/dcn314/Makefile
drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c
drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h
drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c
drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h
drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h
drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h
drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c
drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
drivers/gpu/drm/amd/display/dc/dml/Makefile
drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c [new file with mode: 0644]
drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h [new file with mode: 0644]
drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
drivers/gpu/drm/amd/display/include/dal_asic_id.h
drivers/gpu/drm/amd/display/include/logger_types.h
drivers/gpu/drm/amd/display/modules/freesync/freesync.c
drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h
drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c

index c6cc493a548665e60f900cfc5d060283684fb389..2b97b8a96fb4944963093dc9ff6f569399af12d4 100644 (file)
@@ -148,30 +148,22 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
                              struct amdgpu_reset_context *reset_context)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+       struct list_head *reset_device_list = reset_context->reset_device_list;
        struct amdgpu_device *tmp_adev = NULL;
-       struct list_head reset_device_list;
        int r = 0;
 
        dev_dbg(adev->dev, "aldebaran perform hw reset\n");
+
+       if (reset_device_list == NULL)
+               return -EINVAL;
+
        if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) &&
            reset_context->hive == NULL) {
                /* Wrong context, return error */
                return -EINVAL;
        }
 
-       INIT_LIST_HEAD(&reset_device_list);
-       if (reset_context->hive) {
-               list_for_each_entry (tmp_adev,
-                                    &reset_context->hive->device_list,
-                                    gmc.xgmi.head)
-                       list_add_tail(&tmp_adev->reset_list,
-                                     &reset_device_list);
-       } else {
-               list_add_tail(&reset_context->reset_req_dev->reset_list,
-                             &reset_device_list);
-       }
-
-       list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
+       list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
                mutex_lock(&tmp_adev->reset_cntl->reset_lock);
                tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_MODE2;
        }
@@ -179,7 +171,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
         * Mode2 reset doesn't need any sync between nodes in XGMI hive, instead launch
         * them together so that they can be completed asynchronously on multiple nodes
         */
-       list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
+       list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
                /* For XGMI run all resets in parallel to speed up the process */
                if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
                        if (!queue_work(system_unbound_wq,
@@ -197,7 +189,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
 
        /* For XGMI wait for all resets to complete before proceed */
        if (!r) {
-               list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
+               list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
                        if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
                                flush_work(&tmp_adev->reset_cntl->reset_work);
                                r = tmp_adev->asic_reset_res;
@@ -207,7 +199,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
                }
        }
 
-       list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
+       list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
                mutex_unlock(&tmp_adev->reset_cntl->reset_lock);
                tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_NONE;
        }
@@ -339,10 +331,13 @@ static int
 aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
                                  struct amdgpu_reset_context *reset_context)
 {
+       struct list_head *reset_device_list = reset_context->reset_device_list;
        struct amdgpu_device *tmp_adev = NULL;
-       struct list_head reset_device_list;
        int r;
 
+       if (reset_device_list == NULL)
+               return -EINVAL;
+
        if (reset_context->reset_req_dev->ip_versions[MP1_HWIP][0] ==
                    IP_VERSION(13, 0, 2) &&
            reset_context->hive == NULL) {
@@ -350,19 +345,7 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
                return -EINVAL;
        }
 
-       INIT_LIST_HEAD(&reset_device_list);
-       if (reset_context->hive) {
-               list_for_each_entry (tmp_adev,
-                                    &reset_context->hive->device_list,
-                                    gmc.xgmi.head)
-                       list_add_tail(&tmp_adev->reset_list,
-                                     &reset_device_list);
-       } else {
-               list_add_tail(&reset_context->reset_req_dev->reset_list,
-                             &reset_device_list);
-       }
-
-       list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
+       list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
                dev_info(tmp_adev->dev,
                         "GPU reset succeeded, trying to resume\n");
                r = aldebaran_mode2_restore_ip(tmp_adev);
index e146810c700ba7846c926ef205f6480418b3f415..d597e2656c475da6642e88156c8a9d6e056986da 100644 (file)
@@ -317,7 +317,7 @@ enum amdgpu_kiq_irq {
        AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
        AMDGPU_CP_KIQ_IRQ_LAST
 };
-
+#define SRIOV_USEC_TIMEOUT  1200000 /* wait 12 * 100ms for SRIOV */
 #define MAX_KIQ_REG_WAIT       5000 /* in usecs, 5ms */
 #define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
 #define MAX_KIQ_REG_TRY 1000
index 3c09dcc0986ee96ee8350378a671c4638cae2011..647220a8762dc591cbf83fba338235ca9798ac9e 100644 (file)
@@ -96,6 +96,7 @@ struct amdgpu_amdkfd_fence {
 struct amdgpu_kfd_dev {
        struct kfd_dev *dev;
        uint64_t vram_used;
+       uint64_t vram_used_aligned;
        bool init_complete;
        struct work_struct reset_work;
 };
index a699134a1e8cf5dd8c92fd0ddfe2f1a449eb3b7b..cbd593f7d553f71e0b7b1ba80bf98f9384bcf889 100644 (file)
 #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
 
 /*
- * Align VRAM allocations to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB
+ * Align VRAM availability to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB
  * BO chunk
  */
-#define VRAM_ALLOCATION_ALIGN (1 << 21)
+#define VRAM_AVAILABLITY_ALIGN (1 << 21)
 
 /* Impose limit on how much memory KFD can use */
 static struct {
@@ -149,7 +149,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
                 * to avoid fragmentation caused by 4K allocations in the tail
                 * 2M BO chunk.
                 */
-               vram_needed = ALIGN(size, VRAM_ALLOCATION_ALIGN);
+               vram_needed = size;
        } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
                system_mem_needed = size;
        } else if (!(alloc_flag &
@@ -182,8 +182,10 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
         */
        WARN_ONCE(vram_needed && !adev,
                  "adev reference can't be null when vram is used");
-       if (adev)
+       if (adev) {
                adev->kfd.vram_used += vram_needed;
+               adev->kfd.vram_used_aligned += ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
+       }
        kfd_mem_limit.system_mem_used += system_mem_needed;
        kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
 
@@ -203,8 +205,10 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
        } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
                WARN_ONCE(!adev,
                          "adev reference can't be null when alloc mem flags vram is set");
-               if (adev)
-                       adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN);
+               if (adev) {
+                       adev->kfd.vram_used -= size;
+                       adev->kfd.vram_used_aligned -= ALIGN(size, VRAM_AVAILABLITY_ALIGN);
+               }
        } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
                kfd_mem_limit.system_mem_used -= size;
        } else if (!(alloc_flag &
@@ -1608,15 +1612,14 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev)
        uint64_t reserved_for_pt =
                ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
        size_t available;
-
        spin_lock(&kfd_mem_limit.mem_limit_lock);
        available = adev->gmc.real_vram_size
-               - adev->kfd.vram_used
+               - adev->kfd.vram_used_aligned
                - atomic64_read(&adev->vram_pin_size)
                - reserved_for_pt;
        spin_unlock(&kfd_mem_limit.mem_limit_lock);
 
-       return ALIGN_DOWN(available, VRAM_ALLOCATION_ALIGN);
+       return ALIGN_DOWN(available, VRAM_AVAILABLITY_ALIGN);
 }
 
 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
index fd8f3731758edec3e9e0c35f3d52d8768e067d92..b81b77a9efa6157bcf562454a47b11aa7c557634 100644 (file)
@@ -314,7 +314,7 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
                                        mem_channel_number = vram_info->v30.channel_num;
                                        mem_channel_width = vram_info->v30.channel_width;
                                        if (vram_width)
-                                               *vram_width = mem_channel_number * mem_channel_width;
+                                               *vram_width = mem_channel_number * (1 << mem_channel_width);
                                        break;
                                default:
                                        return -EINVAL;
index d8f1335bc68f416154b6ee3aae6f5a4028f08cbf..b7bae833c804b02b05478fc509fa6d2eb3807341 100644 (file)
@@ -837,16 +837,12 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
                        continue;
 
                r = amdgpu_vm_bo_update(adev, bo_va, false);
-               if (r) {
-                       mutex_unlock(&p->bo_list->bo_list_mutex);
+               if (r)
                        return r;
-               }
 
                r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
-               if (r) {
-                       mutex_unlock(&p->bo_list->bo_list_mutex);
+               if (r)
                        return r;
-               }
        }
 
        r = amdgpu_vm_handle_moved(adev, vm);
index e2eec985adb3a4434ec27eb5d07589497281c126..cb00c7d6f50bec79ba9f2d2bc2c723d733d7dfd5 100644 (file)
@@ -1705,7 +1705,7 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f,
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
        char reg_offset[11];
-       uint32_t *new, *tmp = NULL;
+       uint32_t *new = NULL, *tmp = NULL;
        int ret, i = 0, len = 0;
 
        do {
@@ -1747,7 +1747,8 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f,
        ret = size;
 
 error_free:
-       kfree(tmp);
+       if (tmp != new)
+               kfree(tmp);
        kfree(new);
        return ret;
 }
index c4a6fe3070b6e6e34bae181abcfb6a86c421f171..e8a0b19b7398538411d42160315adde676c3ee55 100644 (file)
@@ -4742,6 +4742,8 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
        tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
                                    reset_list);
        amdgpu_reset_reg_dumps(tmp_adev);
+
+       reset_context->reset_device_list = device_list_handle;
        r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
        /* If reset handler not implemented, continue; otherwise return */
        if (r == -ENOSYS)
index 5071b96be9824629caec9a1a886a65fc15d5ffb2..b1099ee79c50b0bce57a031683dafa7912ff6532 100644 (file)
@@ -272,10 +272,6 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
        /* Signal all jobs not yet scheduled */
        for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
                struct drm_sched_rq *rq = &sched->sched_rq[i];
-
-               if (!rq)
-                       continue;
-
                spin_lock(&rq->lock);
                list_for_each_entry(s_entity, &rq->entities, list) {
                        while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) {
index 9e55a5d7a825334230d09190488585ee650b8a9e..ffda1560c6481d6476fe0ee081224f2ba879bc6c 100644 (file)
@@ -37,6 +37,7 @@ struct amdgpu_reset_context {
        struct amdgpu_device *reset_req_dev;
        struct amdgpu_job *job;
        struct amdgpu_hive_info *hive;
+       struct list_head *reset_device_list;
        unsigned long flags;
 };
 
index 3b4c19412625dd1395adf8f00bd3dbb85ec37940..134575a3893c535cdfd77bd5de2b902c05ba8254 100644 (file)
@@ -637,6 +637,8 @@ struct amdgpu_ttm_tt {
 #endif
 };
 
+#define ttm_to_amdgpu_ttm_tt(ptr)      container_of(ptr, struct amdgpu_ttm_tt, ttm)
+
 #ifdef CONFIG_DRM_AMDGPU_USERPTR
 /*
  * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
@@ -648,7 +650,7 @@ struct amdgpu_ttm_tt {
 int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
 {
        struct ttm_tt *ttm = bo->tbo.ttm;
-       struct amdgpu_ttm_tt *gtt = (void *)ttm;
+       struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
        unsigned long start = gtt->userptr;
        struct vm_area_struct *vma;
        struct mm_struct *mm;
@@ -702,7 +704,7 @@ out_unlock:
  */
 bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
 {
-       struct amdgpu_ttm_tt *gtt = (void *)ttm;
+       struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
        bool r = false;
 
        if (!gtt || !gtt->userptr)
@@ -751,7 +753,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev,
                                     struct ttm_tt *ttm)
 {
        struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
-       struct amdgpu_ttm_tt *gtt = (void *)ttm;
+       struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
        int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
        enum dma_data_direction direction = write ?
                DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
@@ -788,7 +790,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev,
                                        struct ttm_tt *ttm)
 {
        struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
-       struct amdgpu_ttm_tt *gtt = (void *)ttm;
+       struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
        int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
        enum dma_data_direction direction = write ?
                DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
@@ -822,7 +824,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
 {
        struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
        struct ttm_tt *ttm = tbo->ttm;
-       struct amdgpu_ttm_tt *gtt = (void *)ttm;
+       struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
 
        if (amdgpu_bo_encrypted(abo))
                flags |= AMDGPU_PTE_TMZ;
@@ -860,7 +862,7 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
                                   struct ttm_resource *bo_mem)
 {
        struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
-       struct amdgpu_ttm_tt *gtt = (void*)ttm;
+       struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
        uint64_t flags;
        int r;
 
@@ -927,7 +929,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
 {
        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
        struct ttm_operation_ctx ctx = { false, false };
-       struct amdgpu_ttm_tt *gtt = (void *)bo->ttm;
+       struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(bo->ttm);
        struct ttm_placement placement;
        struct ttm_place placements;
        struct ttm_resource *tmp;
@@ -998,7 +1000,7 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
                                      struct ttm_tt *ttm)
 {
        struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
-       struct amdgpu_ttm_tt *gtt = (void *)ttm;
+       struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
 
        /* if the pages have userptr pinning then clear that first */
        if (gtt->userptr) {
@@ -1025,7 +1027,7 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
 static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev,
                                       struct ttm_tt *ttm)
 {
-       struct amdgpu_ttm_tt *gtt = (void *)ttm;
+       struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
 
        if (gtt->usertask)
                put_task_struct(gtt->usertask);
@@ -1079,7 +1081,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
                                  struct ttm_operation_ctx *ctx)
 {
        struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
-       struct amdgpu_ttm_tt *gtt = (void *)ttm;
+       struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
        pgoff_t i;
        int ret;
 
@@ -1113,7 +1115,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
 static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
                                     struct ttm_tt *ttm)
 {
-       struct amdgpu_ttm_tt *gtt = (void *)ttm;
+       struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
        struct amdgpu_device *adev;
        pgoff_t i;
 
@@ -1182,7 +1184,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
        /* Set TTM_TT_FLAG_EXTERNAL before populate but after create. */
        bo->ttm->page_flags |= TTM_TT_FLAG_EXTERNAL;
 
-       gtt = (void *)bo->ttm;
+       gtt = ttm_to_amdgpu_ttm_tt(bo->ttm);
        gtt->userptr = addr;
        gtt->userflags = flags;
 
@@ -1199,7 +1201,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
  */
 struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
 {
-       struct amdgpu_ttm_tt *gtt = (void *)ttm;
+       struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
 
        if (gtt == NULL)
                return NULL;
@@ -1218,7 +1220,7 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
 bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
                                  unsigned long end, unsigned long *userptr)
 {
-       struct amdgpu_ttm_tt *gtt = (void *)ttm;
+       struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
        unsigned long size;
 
        if (gtt == NULL || !gtt->userptr)
@@ -1241,7 +1243,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
  */
 bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
 {
-       struct amdgpu_ttm_tt *gtt = (void *)ttm;
+       struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
 
        if (gtt == NULL || !gtt->userptr)
                return false;
@@ -1254,7 +1256,7 @@ bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
  */
 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
 {
-       struct amdgpu_ttm_tt *gtt = (void *)ttm;
+       struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
 
        if (gtt == NULL)
                return false;
index 108e8e8a1a367e670eeab174d968cad612fbf8fe..576849e9529642a033d6ab768c1560dbb02aaf6c 100644 (file)
@@ -496,8 +496,7 @@ static int amdgpu_vkms_sw_init(void *handle)
        adev_to_drm(adev)->mode_config.max_height = YRES_MAX;
 
        adev_to_drm(adev)->mode_config.preferred_depth = 24;
-       /* disable prefer shadow for now due to hibernation issues */
-       adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+       adev_to_drm(adev)->mode_config.prefer_shadow = 1;
 
        adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base;
 
index 33a8a7365aef9642a560faad85d267de815565ba..f0e235f98afb299d6647020c5177214a0f94632f 100644 (file)
 #include "navi10_enum.h"
 #include "soc15_common.h"
 
+#define regATHUB_MISC_CNTL_V3_0_1                      0x00d7
+#define regATHUB_MISC_CNTL_V3_0_1_BASE_IDX             0
+
+
+static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev)
+{
+       uint32_t data;
+
+       switch (adev->ip_versions[ATHUB_HWIP][0]) {
+       case IP_VERSION(3, 0, 1):
+               data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1);
+               break;
+       default:
+               data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+               break;
+       }
+       return data;
+}
+
+static void athub_v3_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data)
+{
+       switch (adev->ip_versions[ATHUB_HWIP][0]) {
+       case IP_VERSION(3, 0, 1):
+               WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1, data);
+               break;
+       default:
+               WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
+               break;
+       }
+}
+
 static void
 athub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
                                            bool enable)
 {
        uint32_t def, data;
 
-       def = data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+       def = data = athub_v3_0_get_cg_cntl(adev);
 
        if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_MGCG))
                data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
@@ -42,7 +73,7 @@ athub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
                data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
 
        if (def != data)
-               WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
+               athub_v3_0_set_cg_cntl(adev, data);
 }
 
 static void
@@ -51,7 +82,7 @@ athub_v3_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
 {
        uint32_t def, data;
 
-       def = data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+       def = data = athub_v3_0_get_cg_cntl(adev);
 
        if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_LS))
                data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
@@ -59,7 +90,7 @@ athub_v3_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
                data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
 
        if (def != data)
-               WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
+               athub_v3_0_set_cg_cntl(adev, data);
 }
 
 int athub_v3_0_set_clockgating(struct amdgpu_device *adev,
@@ -70,6 +101,7 @@ int athub_v3_0_set_clockgating(struct amdgpu_device *adev,
 
        switch (adev->ip_versions[ATHUB_HWIP][0]) {
        case IP_VERSION(3, 0, 0):
+       case IP_VERSION(3, 0, 1):
        case IP_VERSION(3, 0, 2):
                athub_v3_0_update_medium_grain_clock_gating(adev,
                                state == AMD_CG_STATE_GATE);
@@ -88,7 +120,7 @@ void athub_v3_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
        int data;
 
        /* AMD_CG_SUPPORT_ATHUB_MGCG */
-       data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+       data = athub_v3_0_get_cg_cntl(adev);
        if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
                *flags |= AMD_CG_SUPPORT_ATHUB_MGCG;
 
index 9c964cd3b5d4e24fec07595e172bc6e8bf1aaa59..288fce7dc0ed178305b443d93fe72906e603bbf6 100644 (file)
@@ -2796,8 +2796,7 @@ static int dce_v10_0_sw_init(void *handle)
        adev_to_drm(adev)->mode_config.max_height = 16384;
 
        adev_to_drm(adev)->mode_config.preferred_depth = 24;
-       /* disable prefer shadow for now due to hibernation issues */
-       adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+       adev_to_drm(adev)->mode_config.prefer_shadow = 1;
 
        adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
 
index e0ad9f27dc3f943dbe02d7bbbd491e241e202cad..cbe5250b31cb4e33ac7460a323690df56f955f70 100644 (file)
@@ -2914,8 +2914,7 @@ static int dce_v11_0_sw_init(void *handle)
        adev_to_drm(adev)->mode_config.max_height = 16384;
 
        adev_to_drm(adev)->mode_config.preferred_depth = 24;
-       /* disable prefer shadow for now due to hibernation issues */
-       adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+       adev_to_drm(adev)->mode_config.prefer_shadow = 1;
 
        adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
 
index 77f5e998a1202bb4666a8bac794d29592917aceb..b1c44fab074f32806266b054d4aa163816cd9c46 100644 (file)
@@ -2673,8 +2673,7 @@ static int dce_v6_0_sw_init(void *handle)
        adev_to_drm(adev)->mode_config.max_width = 16384;
        adev_to_drm(adev)->mode_config.max_height = 16384;
        adev_to_drm(adev)->mode_config.preferred_depth = 24;
-       /* disable prefer shadow for now due to hibernation issues */
-       adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+       adev_to_drm(adev)->mode_config.prefer_shadow = 1;
        adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
        adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base;
 
index 802e5c753271cd590af370b9bd2e25a585a6b666..a22b45c9279227a2a28adf8aabebf2718c4dd1ba 100644 (file)
@@ -2693,8 +2693,11 @@ static int dce_v8_0_sw_init(void *handle)
        adev_to_drm(adev)->mode_config.max_height = 16384;
 
        adev_to_drm(adev)->mode_config.preferred_depth = 24;
-       /* disable prefer shadow for now due to hibernation issues */
-       adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+       if (adev->asic_type == CHIP_HAWAII)
+               /* disable prefer shadow for now due to hibernation issues */
+               adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+       else
+               adev_to_drm(adev)->mode_config.prefer_shadow = 1;
 
        adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
 
index fafbad3cf08d8592f13c151f2aeea88aa757e8c5..a2a4dc1844c0adc0c54ab850fa026e1f9ca62aec 100644 (file)
@@ -4846,7 +4846,7 @@ static int gfx_v10_0_sw_init(void *handle)
        case IP_VERSION(10, 3, 3):
        case IP_VERSION(10, 3, 7):
                adev->gfx.me.num_me = 1;
-               adev->gfx.me.num_pipe_per_me = 2;
+               adev->gfx.me.num_pipe_per_me = 1;
                adev->gfx.me.num_queue_per_pipe = 1;
                adev->gfx.mec.num_mec = 2;
                adev->gfx.mec.num_pipe_per_mec = 4;
index 6fd71cb10e54a0f65f3245595c465b2a4f60a2b0..158d87e6805d114cfe132bac68a2e96b78e16f10 100644 (file)
@@ -53,6 +53,7 @@
 #define GFX11_MEC_HPD_SIZE     2048
 
 #define RLCG_UCODE_LOADING_START_ADDRESS       0x00002000L
+#define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1       0x1388
 
 #define regCGTT_WD_CLK_CTRL            0x5086
 #define regCGTT_WD_CLK_CTRL_BASE_IDX   1
@@ -5279,6 +5280,38 @@ static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
        .update_spm_vmid = gfx_v11_0_update_spm_vmid,
 };
 
+static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
+{
+       u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
+
+       if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
+               data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+       else
+               data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+
+       WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data);
+
+       // Program RLC_PG_DELAY3 for CGPG hysteresis
+       if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
+               switch (adev->ip_versions[GC_HWIP][0]) {
+               case IP_VERSION(11, 0, 1):
+                       WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
+                       break;
+               default:
+                       break;
+               }
+       }
+}
+
+static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
+{
+       amdgpu_gfx_rlc_enter_safe_mode(adev);
+
+       gfx_v11_cntl_power_gating(adev, enable);
+
+       amdgpu_gfx_rlc_exit_safe_mode(adev);
+}
+
 static int gfx_v11_0_set_powergating_state(void *handle,
                                           enum amd_powergating_state state)
 {
@@ -5293,6 +5326,11 @@ static int gfx_v11_0_set_powergating_state(void *handle,
        case IP_VERSION(11, 0, 2):
                amdgpu_gfx_off_ctrl(adev, enable);
                break;
+       case IP_VERSION(11, 0, 1):
+               gfx_v11_cntl_pg(adev, enable);
+               /* TODO: Enable this when GFXOFF is ready */
+               // amdgpu_gfx_off_ctrl(adev, enable);
+               break;
        default:
                break;
        }
@@ -5310,6 +5348,7 @@ static int gfx_v11_0_set_clockgating_state(void *handle,
 
        switch (adev->ip_versions[GC_HWIP][0]) {
        case IP_VERSION(11, 0, 0):
+       case IP_VERSION(11, 0, 1):
        case IP_VERSION(11, 0, 2):
                gfx_v11_0_update_gfx_clock_gating(adev,
                                state ==  AMD_CG_STATE_GATE);
index 9ae8cdaa033ee391cdbfea5761879ac87af71862..f513e2c2e964f0c9b3c8d8d522e96692eb55259b 100644 (file)
@@ -419,6 +419,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
        uint32_t seq;
        uint16_t queried_pasid;
        bool ret;
+       u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
        struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
        struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 
@@ -437,7 +438,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 
                amdgpu_ring_commit(ring);
                spin_unlock(&adev->gfx.kiq.ring_lock);
-               r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+               r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
                if (r < 1) {
                        dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
                        return -ETIME;
index 22761a3bb8181e076611ac4eef57eb4182a908c7..4603653916f5a551854a784c75da1817731f4bf9 100644 (file)
@@ -896,6 +896,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
        uint32_t seq;
        uint16_t queried_pasid;
        bool ret;
+       u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
        struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
        struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 
@@ -935,7 +936,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 
                amdgpu_ring_commit(ring);
                spin_unlock(&adev->gfx.kiq.ring_lock);
-               r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+               r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
                if (r < 1) {
                        dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
                        up_read(&adev->reset_domain->sem);
@@ -1624,12 +1625,15 @@ static int gmc_v9_0_sw_init(void *handle)
                        amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47);
                else
                        amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+               if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+                       adev->gmc.translate_further = adev->vm_manager.num_level > 1;
                break;
        case IP_VERSION(9, 4, 1):
                adev->num_vmhubs = 3;
 
                /* Keep the vm size same with Vega20 */
                amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+               adev->gmc.translate_further = adev->vm_manager.num_level > 1;
                break;
        default:
                break;
index 39a696cd45b5e37b6970309714e1b5e6142d9ad7..29c3484ae1f1660a43c4e668f85ba23612db5039 100644 (file)
@@ -40,6 +40,156 @@ static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev,
                        0);
 }
 
+static void hdp_v5_2_update_mem_power_gating(struct amdgpu_device *adev,
+                                            bool enable)
+{
+       uint32_t hdp_clk_cntl;
+       uint32_t hdp_mem_pwr_cntl;
+
+       if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS |
+                               AMD_CG_SUPPORT_HDP_DS |
+                               AMD_CG_SUPPORT_HDP_SD)))
+               return;
+
+       hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL);
+       hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+
+       /* Before doing clock/power mode switch, forced on MEM clock */
+       hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+                                    ATOMIC_MEM_CLK_SOFT_OVERRIDE, 1);
+       hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+                                    RC_MEM_CLK_SOFT_OVERRIDE, 1);
+       WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+
+       /* disable clock and power gating before any changing */
+       hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+                                        ATOMIC_MEM_POWER_CTRL_EN, 0);
+       hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+                                        ATOMIC_MEM_POWER_LS_EN, 0);
+       hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+                                        ATOMIC_MEM_POWER_DS_EN, 0);
+       hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+                                        ATOMIC_MEM_POWER_SD_EN, 0);
+       hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+                                        RC_MEM_POWER_CTRL_EN, 0);
+       hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+                                        RC_MEM_POWER_LS_EN, 0);
+       hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+                                        RC_MEM_POWER_DS_EN, 0);
+       hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+                                        RC_MEM_POWER_SD_EN, 0);
+       WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+
+       /* Already disabled above. The actions below are for "enabled" only */
+       if (enable) {
+               /* only one clock gating mode (LS/DS/SD) can be enabled */
+               if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) {
+                       hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+                                                        HDP_MEM_POWER_CTRL,
+                                                        ATOMIC_MEM_POWER_SD_EN, 1);
+                       hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+                                                        HDP_MEM_POWER_CTRL,
+                                                        RC_MEM_POWER_SD_EN, 1);
+               } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
+                       hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+                                                        HDP_MEM_POWER_CTRL,
+                                                        ATOMIC_MEM_POWER_LS_EN, 1);
+                       hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+                                                        HDP_MEM_POWER_CTRL,
+                                                        RC_MEM_POWER_LS_EN, 1);
+               } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) {
+                       hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+                                                        HDP_MEM_POWER_CTRL,
+                                                        ATOMIC_MEM_POWER_DS_EN, 1);
+                       hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+                                                        HDP_MEM_POWER_CTRL,
+                                                        RC_MEM_POWER_DS_EN, 1);
+               }
+
+               /* confirmed that ATOMIC/RC_MEM_POWER_CTRL_EN have to be set for SRAM LS/DS/SD */
+               if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS |
+                                     AMD_CG_SUPPORT_HDP_SD)) {
+                       hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+                                                        ATOMIC_MEM_POWER_CTRL_EN, 1);
+                       hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+                                                        RC_MEM_POWER_CTRL_EN, 1);
+                       WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+               }
+       }
+
+       /* disable MEM clock override after clock/power mode changing */
+       hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+                                    ATOMIC_MEM_CLK_SOFT_OVERRIDE, 0);
+       hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+                                    RC_MEM_CLK_SOFT_OVERRIDE, 0);
+       WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+}
+
+static void hdp_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+                                                     bool enable)
+{
+       uint32_t hdp_clk_cntl;
+
+       if (!(adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG))
+               return;
+
+       hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL);
+
+       if (enable) {
+               hdp_clk_cntl &=
+                       ~(uint32_t)
+                       (HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK |
+                        HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+                        HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+                        HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+                        HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+                        HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK);
+       } else {
+               hdp_clk_cntl |= HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK |
+                       HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+                       HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+                       HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+                       HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+                       HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK;
+       }
+
+       WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+}
+
+static void hdp_v5_2_get_clockgating_state(struct amdgpu_device *adev,
+                                          u64 *flags)
+{
+       uint32_t tmp;
+
+       /* AMD_CG_SUPPORT_HDP_MGCG */
+       tmp = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL);
+       if (!(tmp & (HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK |
+                    HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+                    HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+                    HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+                    HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+                    HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK)))
+               *flags |= AMD_CG_SUPPORT_HDP_MGCG;
+
+       /* AMD_CG_SUPPORT_HDP_LS/DS/SD */
+       tmp = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+       if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_LS_EN_MASK)
+               *flags |= AMD_CG_SUPPORT_HDP_LS;
+       else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_DS_EN_MASK)
+               *flags |= AMD_CG_SUPPORT_HDP_DS;
+       else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_SD_EN_MASK)
+               *flags |= AMD_CG_SUPPORT_HDP_SD;
+}
+
+static void hdp_v5_2_update_clock_gating(struct amdgpu_device *adev,
+                                             bool enable)
+{
+       hdp_v5_2_update_mem_power_gating(adev, enable);
+       hdp_v5_2_update_medium_grain_clock_gating(adev, enable);
+}
+
 const struct amdgpu_hdp_funcs hdp_v5_2_funcs = {
        .flush_hdp = hdp_v5_2_flush_hdp,
+       .update_clock_gating = hdp_v5_2_update_clock_gating,
+       .get_clock_gating_state = hdp_v5_2_get_clockgating_state,
 };
index 92dc60a9d2094df3d7f3be42fbd8aa410ddaf340..085e613f3646d945e9d1dd668d037f1cc08dad2e 100644 (file)
@@ -727,6 +727,7 @@ static const struct amd_ip_funcs ih_v6_0_ip_funcs = {
 static const struct amdgpu_ih_funcs ih_v6_0_funcs = {
        .get_wptr = ih_v6_0_get_wptr,
        .decode_iv = amdgpu_ih_decode_iv_helper,
+       .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
        .set_rptr = ih_v6_0_set_rptr
 };
 
index cac72ced94c852e155ac11a280aeb20a7109774e..e8058edc1d1083969381374daf0ebe11751c15c4 100644 (file)
@@ -518,18 +518,41 @@ static u64 mmhub_v3_0_1_get_mc_fb_offset(struct amdgpu_device *adev)
 static void mmhub_v3_0_1_update_medium_grain_clock_gating(struct amdgpu_device *adev,
                                                          bool enable)
 {
-       //TODO
+       uint32_t def, data;
+
+       def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+       if (enable)
+               data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
+       else
+               data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK;
+
+       if (def != data)
+               WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
 }
 
 static void mmhub_v3_0_1_update_medium_grain_light_sleep(struct amdgpu_device *adev,
                                                         bool enable)
 {
-       //TODO
+       uint32_t def, data;
+
+       def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+       if (enable)
+               data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+       else
+               data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+
+       if (def != data)
+               WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
 }
 
 static int mmhub_v3_0_1_set_clockgating(struct amdgpu_device *adev,
                                        enum amd_clockgating_state state)
 {
+       if (amdgpu_sriov_vf(adev))
+               return 0;
+
        mmhub_v3_0_1_update_medium_grain_clock_gating(adev,
                        state == AMD_CG_STATE_GATE);
        mmhub_v3_0_1_update_medium_grain_light_sleep(adev,
@@ -539,7 +562,20 @@ static int mmhub_v3_0_1_set_clockgating(struct amdgpu_device *adev,
 
 static void mmhub_v3_0_1_get_clockgating(struct amdgpu_device *adev, u64 *flags)
 {
-       //TODO
+       int data;
+
+       if (amdgpu_sriov_vf(adev))
+               *flags = 0;
+
+       data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+       /* AMD_CG_SUPPORT_MC_MGCG */
+       if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK)
+               *flags |= AMD_CG_SUPPORT_MC_MGCG;
+
+       /* AMD_CG_SUPPORT_MC_LS */
+       if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
+               *flags |= AMD_CG_SUPPORT_MC_LS;
 }
 
 const struct amdgpu_mmhub_funcs mmhub_v3_0_1_funcs = {
index 4b5396d3e60f668985b2487d4d946af7b1555af5..eec13cb5bf75828e45c88c7715b0afb157d7605d 100644 (file)
@@ -409,9 +409,11 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev,
        u32 wptr, tmp;
        struct amdgpu_ih_regs *ih_regs;
 
-       if (ih == &adev->irq.ih) {
+       if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) {
                /* Only ring0 supports writeback. On other rings fall back
                 * to register-based code with overflow checking below.
+                * ih_soft ring doesn't have any backing hardware registers,
+                * update wptr and return.
                 */
                wptr = le32_to_cpu(*ih->wptr_cpu);
 
@@ -483,6 +485,9 @@ static void navi10_ih_set_rptr(struct amdgpu_device *adev,
 {
        struct amdgpu_ih_regs *ih_regs;
 
+       if (ih == &adev->irq.ih_soft)
+               return;
+
        if (ih->use_doorbell) {
                /* XXX check if swapping is necessary on BE */
                *ih->rptr_cpu = ih->rptr;
index a2588200ea580919786074c936b643900aa95c60..0b2ac418e4ac4f79dcc1f2fe109851104603b4fe 100644 (file)
@@ -101,6 +101,16 @@ static int psp_v12_0_init_microcode(struct psp_context *psp)
                adev->psp.dtm_context.context.bin_desc.start_addr =
                        (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr +
                        le32_to_cpu(ta_hdr->dtm.offset_bytes);
+
+               if (adev->apu_flags & AMD_APU_IS_RENOIR) {
+                       adev->psp.securedisplay_context.context.bin_desc.fw_version =
+                               le32_to_cpu(ta_hdr->securedisplay.fw_version);
+                       adev->psp.securedisplay_context.context.bin_desc.size_bytes =
+                               le32_to_cpu(ta_hdr->securedisplay.size_bytes);
+                       adev->psp.securedisplay_context.context.bin_desc.start_addr =
+                               (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr +
+                               le32_to_cpu(ta_hdr->securedisplay.offset_bytes);
+               }
        }
 
        return 0;
index 726a5bba40b2023694bafbc5826e4729d6ee6cfa..a75a286e1ecf37e0927ec05cdfcf7a15f7a3d582 100644 (file)
@@ -20,7 +20,6 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include <linux/dev_printk.h>
 #include <drm/drm_drv.h>
 #include <linux/vmalloc.h>
 #include "amdgpu.h"
index 52816de5e17bf77af187c259d592476fa7e98cd6..1ff7fc7bb3400a490e275d3962737f558ff8d9fc 100644 (file)
@@ -546,8 +546,10 @@ static int soc21_common_early_init(void *handle)
        case IP_VERSION(11, 0, 0):
                adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG |
                        AMD_CG_SUPPORT_GFX_CGLS |
+#if 0
                        AMD_CG_SUPPORT_GFX_3D_CGCG |
                        AMD_CG_SUPPORT_GFX_3D_CGLS |
+#endif
                        AMD_CG_SUPPORT_GFX_MGCG |
                        AMD_CG_SUPPORT_REPEATER_FGCG |
                        AMD_CG_SUPPORT_GFX_FGCG |
@@ -575,7 +577,9 @@ static int soc21_common_early_init(void *handle)
                        AMD_CG_SUPPORT_VCN_MGCG |
                        AMD_CG_SUPPORT_JPEG_MGCG |
                        AMD_CG_SUPPORT_ATHUB_MGCG |
-                       AMD_CG_SUPPORT_ATHUB_LS;
+                       AMD_CG_SUPPORT_ATHUB_LS |
+                       AMD_CG_SUPPORT_IH_CG |
+                       AMD_CG_SUPPORT_HDP_SD;
                adev->pg_flags =
                        AMD_PG_SUPPORT_VCN |
                        AMD_PG_SUPPORT_VCN_DPG |
@@ -586,9 +590,23 @@ static int soc21_common_early_init(void *handle)
                break;
        case IP_VERSION(11, 0, 1):
                adev->cg_flags =
+                       AMD_CG_SUPPORT_GFX_CGCG |
+                       AMD_CG_SUPPORT_GFX_CGLS |
+                       AMD_CG_SUPPORT_GFX_MGCG |
+                       AMD_CG_SUPPORT_GFX_FGCG |
+                       AMD_CG_SUPPORT_REPEATER_FGCG |
+                       AMD_CG_SUPPORT_GFX_PERF_CLK |
+                       AMD_CG_SUPPORT_MC_MGCG |
+                       AMD_CG_SUPPORT_MC_LS |
+                       AMD_CG_SUPPORT_HDP_MGCG |
+                       AMD_CG_SUPPORT_HDP_LS |
+                       AMD_CG_SUPPORT_ATHUB_MGCG |
+                       AMD_CG_SUPPORT_ATHUB_LS |
+                       AMD_CG_SUPPORT_IH_CG |
                        AMD_CG_SUPPORT_VCN_MGCG |
                        AMD_CG_SUPPORT_JPEG_MGCG;
                adev->pg_flags =
+                       AMD_PG_SUPPORT_GFX_PG |
                        AMD_PG_SUPPORT_JPEG;
                adev->external_rev_id = adev->rev_id + 0x1;
                break;
@@ -683,6 +701,7 @@ static int soc21_common_set_clockgating_state(void *handle,
 
        switch (adev->ip_versions[NBIO_HWIP][0]) {
        case IP_VERSION(4, 3, 0):
+       case IP_VERSION(4, 3, 1):
                adev->nbio.funcs->update_medium_grain_clock_gating(adev,
                                state == AMD_CG_STATE_GATE);
                adev->nbio.funcs->update_medium_grain_light_sleep(adev,
@@ -690,6 +709,10 @@ static int soc21_common_set_clockgating_state(void *handle,
                adev->hdp.funcs->update_clock_gating(adev,
                                state == AMD_CG_STATE_GATE);
                break;
+       case IP_VERSION(7, 7, 0):
+               adev->hdp.funcs->update_clock_gating(adev,
+                               state == AMD_CG_STATE_GATE);
+               break;
        default:
                break;
        }
index ca14c3ef742ecd27881baa1912bd772ce038fc49..fb2d74f3044814522958adda6e6435cad0ea9617 100644 (file)
@@ -1115,7 +1115,7 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
  *
  * Stop VCN block with dpg mode
  */
-static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+static void vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
 {
        uint32_t tmp;
 
@@ -1133,7 +1133,6 @@ static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
        /* disable dynamic power gating mode */
        WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0,
                ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
-       return 0;
 }
 
 /**
@@ -1154,7 +1153,7 @@ static int vcn_v4_0_stop(struct amdgpu_device *adev)
                fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
 
                if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
-                       r = vcn_v4_0_stop_dpg_mode(adev, i);
+                       vcn_v4_0_stop_dpg_mode(adev, i);
                        continue;
                }
 
index cdd599a081258c304d880f1fc6d9ffd7294d8bc5..03b7066471f9ad251d4337350ced36882a10f582 100644 (file)
@@ -334,9 +334,11 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev,
        u32 wptr, tmp;
        struct amdgpu_ih_regs *ih_regs;
 
-       if (ih == &adev->irq.ih) {
+       if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) {
                /* Only ring0 supports writeback. On other rings fall back
                 * to register-based code with overflow checking below.
+                * ih_soft ring doesn't have any backing hardware registers,
+                * update wptr and return.
                 */
                wptr = le32_to_cpu(*ih->wptr_cpu);
 
@@ -409,6 +411,9 @@ static void vega10_ih_set_rptr(struct amdgpu_device *adev,
 {
        struct amdgpu_ih_regs *ih_regs;
 
+       if (ih == &adev->irq.ih_soft)
+               return;
+
        if (ih->use_doorbell) {
                /* XXX check if swapping is necessary on BE */
                *ih->rptr_cpu = ih->rptr;
index 3b4eb8285943c1c4091d54c06eea8fb6d2966d5c..2022ffbb8dba55e6522e56e689d582c87dc6e543 100644 (file)
@@ -385,9 +385,11 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev,
        u32 wptr, tmp;
        struct amdgpu_ih_regs *ih_regs;
 
-       if (ih == &adev->irq.ih) {
+       if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) {
                /* Only ring0 supports writeback. On other rings fall back
                 * to register-based code with overflow checking below.
+                * ih_soft ring doesn't have any backing hardware registers,
+                * update wptr and return.
                 */
                wptr = le32_to_cpu(*ih->wptr_cpu);
 
@@ -461,6 +463,9 @@ static void vega20_ih_set_rptr(struct amdgpu_device *adev,
 {
        struct amdgpu_ih_regs *ih_regs;
 
+       if (ih == &adev->irq.ih_soft)
+               return;
+
        if (ih->use_doorbell) {
                /* XXX check if swapping is necessary on BE */
                *ih->rptr_cpu = ih->rptr;
index 2b3d8bc8f0aaeb2e243c43f75f40675fb3f039bb..dc774ddf34456461a0818c4cb0955efadfdc566c 100644 (file)
@@ -874,7 +874,7 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
        err = kfd_wait_on_events(p, args->num_events,
                        (void __user *)args->events_ptr,
                        (args->wait_for_all != 0),
-                       args->timeout, &args->wait_result);
+                       &args->timeout, &args->wait_result);
 
        return err;
 }
index f5853835f03a23c0f6857c3c037a41218a0ec9d8..357298e69495f849af6f45a050a018898d300261 100644 (file)
@@ -102,13 +102,18 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
 
        switch (sdma_version) {
        case IP_VERSION(6, 0, 0):
-       case IP_VERSION(6, 0, 1):
        case IP_VERSION(6, 0, 2):
                /* Reserve 1 for paging and 1 for gfx */
                kfd->device_info.num_reserved_sdma_queues_per_engine = 2;
                /* BIT(0)=engine-0 queue-0; BIT(1)=engine-1 queue-0; BIT(2)=engine-0 queue-1; ... */
                kfd->device_info.reserved_sdma_queues_bitmap = 0xFULL;
                break;
+       case IP_VERSION(6, 0, 1):
+               /* Reserve 1 for paging and 1 for gfx */
+               kfd->device_info.num_reserved_sdma_queues_per_engine = 2;
+               /* BIT(0)=engine-0 queue-0; BIT(1)=engine-0 queue-1; ... */
+               kfd->device_info.reserved_sdma_queues_bitmap = 0x3ULL;
+               break;
        default:
                break;
        }
index 3942a56c28bbbcce5a2f90e86b522ac7db4ce1b2..83e3ce9f604911b554f5e1a600e1dee49db02b3a 100644 (file)
@@ -894,7 +894,8 @@ static long user_timeout_to_jiffies(uint32_t user_timeout_ms)
        return msecs_to_jiffies(user_timeout_ms) + 1;
 }
 
-static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters)
+static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters,
+                        bool undo_auto_reset)
 {
        uint32_t i;
 
@@ -903,6 +904,9 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters)
                        spin_lock(&waiters[i].event->lock);
                        remove_wait_queue(&waiters[i].event->wq,
                                          &waiters[i].wait);
+                       if (undo_auto_reset && waiters[i].activated &&
+                           waiters[i].event && waiters[i].event->auto_reset)
+                               set_event(waiters[i].event);
                        spin_unlock(&waiters[i].event->lock);
                }
 
@@ -911,7 +915,7 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters)
 
 int kfd_wait_on_events(struct kfd_process *p,
                       uint32_t num_events, void __user *data,
-                      bool all, uint32_t user_timeout_ms,
+                      bool all, uint32_t *user_timeout_ms,
                       uint32_t *wait_result)
 {
        struct kfd_event_data __user *events =
@@ -920,7 +924,7 @@ int kfd_wait_on_events(struct kfd_process *p,
        int ret = 0;
 
        struct kfd_event_waiter *event_waiters = NULL;
-       long timeout = user_timeout_to_jiffies(user_timeout_ms);
+       long timeout = user_timeout_to_jiffies(*user_timeout_ms);
 
        event_waiters = alloc_event_waiters(num_events);
        if (!event_waiters) {
@@ -970,15 +974,11 @@ int kfd_wait_on_events(struct kfd_process *p,
                }
 
                if (signal_pending(current)) {
-                       /*
-                        * This is wrong when a nonzero, non-infinite timeout
-                        * is specified. We need to use
-                        * ERESTARTSYS_RESTARTBLOCK, but struct restart_block
-                        * contains a union with data for each user and it's
-                        * in generic kernel code that I don't want to
-                        * touch yet.
-                        */
                        ret = -ERESTARTSYS;
+                       if (*user_timeout_ms != KFD_EVENT_TIMEOUT_IMMEDIATE &&
+                           *user_timeout_ms != KFD_EVENT_TIMEOUT_INFINITE)
+                               *user_timeout_ms = jiffies_to_msecs(
+                                       max(0l, timeout-1));
                        break;
                }
 
@@ -1019,7 +1019,7 @@ int kfd_wait_on_events(struct kfd_process *p,
                                               event_waiters, events);
 
 out_unlock:
-       free_waiters(num_events, event_waiters);
+       free_waiters(num_events, event_waiters, ret == -ERESTARTSYS);
        mutex_unlock(&p->event_mutex);
 out:
        if (ret)
index d03a3b9c9c5d66cb532f4576e40c9ad7d1d5088b..bf610e3b683bbaf23212de15103c5c68e744b09b 100644 (file)
@@ -1317,7 +1317,7 @@ void kfd_event_free_process(struct kfd_process *p);
 int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma);
 int kfd_wait_on_events(struct kfd_process *p,
                       uint32_t num_events, void __user *data,
-                      bool all, uint32_t user_timeout_ms,
+                      bool all, uint32_t *user_timeout_ms,
                       uint32_t *wait_result);
 void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
                                uint32_t valid_id_bits);
index a67ba8879a56730226cc0ebbdd21a35cce8d68ba..11074cc8c333b274484929dddb0752725e4af24b 100644 (file)
@@ -541,7 +541,6 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
                kfree(svm_bo);
                return -ESRCH;
        }
-       svm_bo->svms = prange->svms;
        svm_bo->eviction_fence =
                amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
                                           mm,
@@ -3273,7 +3272,6 @@ int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence)
 static void svm_range_evict_svm_bo_worker(struct work_struct *work)
 {
        struct svm_range_bo *svm_bo;
-       struct kfd_process *p;
        struct mm_struct *mm;
        int r = 0;
 
@@ -3281,13 +3279,12 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
        if (!svm_bo_ref_unless_zero(svm_bo))
                return; /* svm_bo was freed while eviction was pending */
 
-       /* svm_range_bo_release destroys this worker thread. So during
-        * the lifetime of this thread, kfd_process and mm will be valid.
-        */
-       p = container_of(svm_bo->svms, struct kfd_process, svms);
-       mm = p->mm;
-       if (!mm)
+       if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
+               mm = svm_bo->eviction_fence->mm;
+       } else {
+               svm_range_bo_unref(svm_bo);
                return;
+       }
 
        mmap_read_lock(mm);
        spin_lock(&svm_bo->list_lock);
@@ -3305,8 +3302,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
 
                mutex_lock(&prange->migrate_mutex);
                do {
-                       r = svm_migrate_vram_to_ram(prange,
-                                               svm_bo->eviction_fence->mm,
+                       r = svm_migrate_vram_to_ram(prange, mm,
                                                KFD_MIGRATE_TRIGGER_TTM_EVICTION);
                } while (!r && prange->actual_loc && --retries);
 
@@ -3324,6 +3320,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
        }
        spin_unlock(&svm_bo->list_lock);
        mmap_read_unlock(mm);
+       mmput(mm);
 
        dma_fence_signal(&svm_bo->eviction_fence->base);
 
index 9156b041ef17519db0094cbc05d8868fda0885e1..cfac13ad06ef0f70e2983bceeb85b59c0427e2fb 100644 (file)
@@ -46,7 +46,6 @@ struct svm_range_bo {
        spinlock_t                      list_lock;
        struct amdgpu_amdkfd_fence      *eviction_fence;
        struct work_struct              eviction_work;
-       struct svm_range_list           *svms;
        uint32_t                        evicting;
        struct work_struct              release_work;
 };
index 25990bec600d08fde12f59d96b16a6cdfb394069..3f0a4a415907d425b113f251684822f90ed4c495 100644 (file)
@@ -1392,8 +1392,8 @@ static int kfd_build_p2p_node_entry(struct kfd_topology_device *dev,
 
 static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int gpu_node)
 {
+       struct kfd_iolink_properties *gpu_link, *tmp_link, *cpu_link;
        struct kfd_iolink_properties *props = NULL, *props2 = NULL;
-       struct kfd_iolink_properties *gpu_link, *cpu_link;
        struct kfd_topology_device *cpu_dev;
        int ret = 0;
        int i, num_cpu;
@@ -1416,16 +1416,19 @@ static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int g
                        continue;
 
                /* find CPU <-->  CPU links */
+               cpu_link = NULL;
                cpu_dev = kfd_topology_device_by_proximity_domain(i);
                if (cpu_dev) {
-                       list_for_each_entry(cpu_link,
+                       list_for_each_entry(tmp_link,
                                        &cpu_dev->io_link_props, list) {
-                               if (cpu_link->node_to == gpu_link->node_to)
+                               if (tmp_link->node_to == gpu_link->node_to) {
+                                       cpu_link = tmp_link;
                                        break;
+                               }
                        }
                }
 
-               if (cpu_link->node_to != gpu_link->node_to)
+               if (!cpu_link)
                        return -ENOMEM;
 
                /* CPU <--> CPU <--> GPU, GPU node*/
index 8660d93cc40551add54b57a549421640940b5036..5140d9c2bf3b40b689134fa9017cf9953aedecd6 100644 (file)
@@ -3825,8 +3825,11 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
        adev_to_drm(adev)->mode_config.max_height = 16384;
 
        adev_to_drm(adev)->mode_config.preferred_depth = 24;
-       /* disable prefer shadow for now due to hibernation issues */
-       adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+       if (adev->asic_type == CHIP_HAWAII)
+               /* disable prefer shadow for now due to hibernation issues */
+               adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+       else
+               adev_to_drm(adev)->mode_config.prefer_shadow = 1;
        /* indicates support for immediate flip */
        adev_to_drm(adev)->mode_config.async_page_flip = true;
 
@@ -4135,6 +4138,7 @@ static void register_backlight_device(struct amdgpu_display_manager *dm,
        }
 }
 
+static void amdgpu_set_panel_orientation(struct drm_connector *connector);
 
 /*
  * In this architecture, the association
@@ -4326,6 +4330,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
                                        adev_to_drm(adev)->vblank_disable_immediate = false;
                        }
                }
+               amdgpu_set_panel_orientation(&aconnector->base);
        }
 
        /* Software is initialized. Now we can register interrupt handlers. */
@@ -6684,6 +6689,10 @@ static void amdgpu_set_panel_orientation(struct drm_connector *connector)
            connector->connector_type != DRM_MODE_CONNECTOR_LVDS)
                return;
 
+       mutex_lock(&connector->dev->mode_config.mutex);
+       amdgpu_dm_connector_get_modes(connector);
+       mutex_unlock(&connector->dev->mode_config.mutex);
+
        encoder = amdgpu_dm_connector_to_encoder(connector);
        if (!encoder)
                return;
@@ -6728,8 +6737,6 @@ static void amdgpu_dm_connector_ddc_get_modes(struct drm_connector *connector,
                 * restored here.
                 */
                amdgpu_dm_update_freesync_caps(connector, edid);
-
-               amdgpu_set_panel_orientation(connector);
        } else {
                amdgpu_dm_connector->num_modes = 0;
        }
index b841b8b0a9d82074a3699f523ba8013a1277631c..fca7cf9dbaeec50a42f5591409f325bc7f0eeade 100644 (file)
@@ -660,7 +660,7 @@ static int get_plane_modifiers(struct amdgpu_device *adev, unsigned int plane_ty
                        add_gfx10_1_modifiers(adev, mods, &size, &capacity);
                break;
        case AMDGPU_FAMILY_GC_11_0_0:
-       case AMDGPU_FAMILY_GC_11_0_2:
+       case AMDGPU_FAMILY_GC_11_0_1:
                add_gfx11_modifiers(adev, mods, &size, &capacity);
                break;
        }
@@ -1412,7 +1412,7 @@ static bool dm_plane_format_mod_supported(struct drm_plane *plane,
                }
                break;
        case AMDGPU_FAMILY_GC_11_0_0:
-       case AMDGPU_FAMILY_GC_11_0_2:
+       case AMDGPU_FAMILY_GC_11_0_1:
                switch (AMD_FMT_MOD_GET(TILE, modifier)) {
                case AMD_FMT_MOD_TILE_GFX11_256K_R_X:
                case AMD_FMT_MOD_TILE_GFX9_64K_R_X:
index 6767fab55c260d4869095c3baaf9517c5b831dd3..352e9afb85c6d67354eb47204b36c8dbc4e9da83 100644 (file)
@@ -100,3 +100,24 @@ void convert_float_matrix(
                matrix[i] = (uint16_t)reg_value;
        }
 }
+
+static uint32_t find_gcd(uint32_t a, uint32_t b)
+{
+       uint32_t remainder = 0;
+       while (b != 0) {
+               remainder = a % b;
+               a = b;
+               b = remainder;
+       }
+       return a;
+}
+
+void reduce_fraction(uint32_t num, uint32_t den,
+               uint32_t *out_num, uint32_t *out_den)
+{
+       uint32_t gcd = 0;
+
+       gcd = find_gcd(num, den);
+       *out_num = num / gcd;
+       *out_den = den / gcd;
+}
index ade785c4fdc7dc1fbd347d9dbb22bb9bb8336464..81da4e6f7a1acb074c02d719b068661b1c06db81 100644 (file)
@@ -38,6 +38,9 @@ void convert_float_matrix(
        struct fixed31_32 *flt,
        uint32_t buffer_size);
 
+void reduce_fraction(uint32_t num, uint32_t den,
+               uint32_t *out_num, uint32_t *out_den);
+
 static inline unsigned int log_2(unsigned int num)
 {
        return ilog2(num);
index 4c76091fd1f21af087cfb75532678d51ca7ce719..f276abb63bcd7ce4ea2a0e3d7e6beea1581b9d98 100644 (file)
@@ -337,7 +337,7 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p
            break;
        }
 
-       case AMDGPU_FAMILY_GC_11_0_2: {
+       case AMDGPU_FAMILY_GC_11_0_1: {
                struct clk_mgr_dcn314 *clk_mgr = kzalloc(sizeof(*clk_mgr), GFP_KERNEL);
 
                if (clk_mgr == NULL) {
@@ -397,7 +397,7 @@ void dc_destroy_clk_mgr(struct clk_mgr *clk_mgr_base)
                dcn32_clk_mgr_destroy(clk_mgr);
                break;
 
-       case AMDGPU_FAMILY_GC_11_0_2:
+       case AMDGPU_FAMILY_GC_11_0_1:
                dcn314_clk_mgr_destroy(clk_mgr);
                break;
 
index 0202dc682682b1cd7975b27e64dd550a6dd44a6e..ca6dfd2d7561fab9378ddfa7a0fd3781e7872df9 100644 (file)
  */
 
 #include "dccg.h"
-#include "clk_mgr_internal.h"
+#include "rn_clk_mgr.h"
 
 #include "dcn20/dcn20_clk_mgr.h"
-#include "rn_clk_mgr.h"
 #include "dml/dcn20/dcn20_fpu.h"
 
 #include "dce100/dce_clk_mgr.h"
index 2e088c5171b28b89f51e49d1660c42752566fea8..f1319957e400af37a0450c17821519006f5899a7 100644 (file)
@@ -28,6 +28,7 @@
 
 #include "clk_mgr.h"
 #include "dm_pp_smu.h"
+#include "clk_mgr_internal.h"
 
 extern struct wm_table ddr4_wm_table_gs;
 extern struct wm_table lpddr4_wm_table_gs;
index ee99974b3b62bb3aea7a43b10e6009e0717de67d..beb025cd3dc29671a5917a5d0f01ef2c46410d63 100644 (file)
@@ -307,16 +307,6 @@ static void dcn314_enable_pme_wa(struct clk_mgr *clk_mgr_base)
        dcn314_smu_enable_pme_wa(clk_mgr);
 }
 
-void dcn314_init_clocks(struct clk_mgr *clk_mgr)
-{
-       memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
-       // Assumption is that boot state always supports pstate
-       clk_mgr->clks.p_state_change_support = true;
-       clk_mgr->clks.prev_p_state_change_support = true;
-       clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN;
-       clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN;
-}
-
 bool dcn314_are_clock_states_equal(struct dc_clocks *a,
                struct dc_clocks *b)
 {
@@ -425,7 +415,7 @@ static struct wm_table lpddr5_wm_table = {
        }
 };
 
-static DpmClocks_t dummy_clocks;
+static DpmClocks314_t dummy_clocks;
 
 static struct dcn314_watermarks dummy_wms = { 0 };
 
@@ -510,7 +500,7 @@ static void dcn314_notify_wm_ranges(struct clk_mgr *clk_mgr_base)
 static void dcn314_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr,
                struct dcn314_smu_dpm_clks *smu_dpm_clks)
 {
-       DpmClocks_t *table = smu_dpm_clks->dpm_clks;
+       DpmClocks314_t *table = smu_dpm_clks->dpm_clks;
 
        if (!clk_mgr->smu_ver)
                return;
@@ -527,6 +517,26 @@ static void dcn314_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr,
        dcn314_smu_transfer_dpm_table_smu_2_dram(clk_mgr);
 }
 
+static inline bool is_valid_clock_value(uint32_t clock_value)
+{
+       return clock_value > 1 && clock_value < 100000;
+}
+
+static unsigned int convert_wck_ratio(uint8_t wck_ratio)
+{
+       switch (wck_ratio) {
+       case WCK_RATIO_1_2:
+               return 2;
+
+       case WCK_RATIO_1_4:
+               return 4;
+
+       default:
+               break;
+       }
+       return 1;
+}
+
 static uint32_t find_max_clk_value(const uint32_t clocks[], uint32_t num_clocks)
 {
        uint32_t max = 0;
@@ -540,89 +550,127 @@ static uint32_t find_max_clk_value(const uint32_t clocks[], uint32_t num_clocks)
        return max;
 }
 
-static unsigned int find_clk_for_voltage(
-               const DpmClocks_t *clock_table,
-               const uint32_t clocks[],
-               unsigned int voltage)
-{
-       int i;
-       int max_voltage = 0;
-       int clock = 0;
-
-       for (i = 0; i < NUM_SOC_VOLTAGE_LEVELS; i++) {
-               if (clock_table->SocVoltage[i] == voltage) {
-                       return clocks[i];
-               } else if (clock_table->SocVoltage[i] >= max_voltage &&
-                               clock_table->SocVoltage[i] < voltage) {
-                       max_voltage = clock_table->SocVoltage[i];
-                       clock = clocks[i];
-               }
-       }
-
-       ASSERT(clock);
-       return clock;
-}
-
 static void dcn314_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk_mgr,
                                                    struct integrated_info *bios_info,
-                                                   const DpmClocks_t *clock_table)
+                                                   const DpmClocks314_t *clock_table)
 {
-       int i, j;
        struct clk_bw_params *bw_params = clk_mgr->base.bw_params;
-       uint32_t max_dispclk = 0, max_dppclk = 0;
-
-       j = -1;
-
-       ASSERT(NUM_DF_PSTATE_LEVELS <= MAX_NUM_DPM_LVL);
-
-       /* Find lowest DPM, FCLK is filled in reverse order*/
+       struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1];
+       uint32_t max_pstate = 0,  max_fclk = 0,  min_pstate = 0, max_dispclk = 0, max_dppclk = 0;
+       int i;
 
-       for (i = NUM_DF_PSTATE_LEVELS - 1; i >= 0; i--) {
-               if (clock_table->DfPstateTable[i].FClk != 0) {
-                       j = i;
-                       break;
+       /* Find highest valid fclk pstate */
+       for (i = 0; i < clock_table->NumDfPstatesEnabled; i++) {
+               if (is_valid_clock_value(clock_table->DfPstateTable[i].FClk) &&
+                   clock_table->DfPstateTable[i].FClk > max_fclk) {
+                       max_fclk = clock_table->DfPstateTable[i].FClk;
+                       max_pstate = i;
                }
        }
 
-       if (j == -1) {
-               /* clock table is all 0s, just use our own hardcode */
-               ASSERT(0);
-               return;
-       }
-
-       bw_params->clk_table.num_entries = j + 1;
+       /* We expect the table to contain at least one valid fclk entry. */
+       ASSERT(is_valid_clock_value(max_fclk));
 
-       /* dispclk and dppclk can be max at any voltage, same number of levels for both */
+       /* Dispclk and dppclk can be max at any voltage, same number of levels for both */
        if (clock_table->NumDispClkLevelsEnabled <= NUM_DISPCLK_DPM_LEVELS &&
            clock_table->NumDispClkLevelsEnabled <= NUM_DPPCLK_DPM_LEVELS) {
                max_dispclk = find_max_clk_value(clock_table->DispClocks, clock_table->NumDispClkLevelsEnabled);
                max_dppclk = find_max_clk_value(clock_table->DppClocks, clock_table->NumDispClkLevelsEnabled);
        } else {
+               /* Invalid number of entries in the table from PMFW. */
                ASSERT(0);
        }
 
-       for (i = 0; i < bw_params->clk_table.num_entries; i++, j--) {
-               bw_params->clk_table.entries[i].fclk_mhz = clock_table->DfPstateTable[j].FClk;
-               bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[j].MemClk;
-               bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[j].Voltage;
-               switch (clock_table->DfPstateTable[j].WckRatio) {
-               case WCK_RATIO_1_2:
-                       bw_params->clk_table.entries[i].wck_ratio = 2;
-                       break;
-               case WCK_RATIO_1_4:
-                       bw_params->clk_table.entries[i].wck_ratio = 4;
-                       break;
-               default:
-                       bw_params->clk_table.entries[i].wck_ratio = 1;
+       /* Base the clock table on dcfclk, need at least one entry regardless of pmfw table */
+       for (i = 0; i < clock_table->NumDcfClkLevelsEnabled; i++) {
+               uint32_t min_fclk = clock_table->DfPstateTable[0].FClk;
+               int j;
+
+               for (j = 1; j < clock_table->NumDfPstatesEnabled; j++) {
+                       if (is_valid_clock_value(clock_table->DfPstateTable[j].FClk) &&
+                           clock_table->DfPstateTable[j].FClk < min_fclk &&
+                           clock_table->DfPstateTable[j].Voltage <= clock_table->SocVoltage[i]) {
+                               min_fclk = clock_table->DfPstateTable[j].FClk;
+                               min_pstate = j;
+                       }
                }
-               bw_params->clk_table.entries[i].dcfclk_mhz = find_clk_for_voltage(clock_table, clock_table->DcfClocks, clock_table->DfPstateTable[j].Voltage);
-               bw_params->clk_table.entries[i].socclk_mhz = find_clk_for_voltage(clock_table, clock_table->SocClocks, clock_table->DfPstateTable[j].Voltage);
+
+               /* First search defaults for the clocks we don't read using closest lower or equal default dcfclk */
+               for (j = bw_params->clk_table.num_entries - 1; j > 0; j--)
+                       if (bw_params->clk_table.entries[j].dcfclk_mhz <= clock_table->DcfClocks[i])
+                               break;
+
+               bw_params->clk_table.entries[i].phyclk_mhz = bw_params->clk_table.entries[j].phyclk_mhz;
+               bw_params->clk_table.entries[i].phyclk_d18_mhz = bw_params->clk_table.entries[j].phyclk_d18_mhz;
+               bw_params->clk_table.entries[i].dtbclk_mhz = bw_params->clk_table.entries[j].dtbclk_mhz;
+
+               /* Now update clocks we do read */
+               bw_params->clk_table.entries[i].fclk_mhz = min_fclk;
+               bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[min_pstate].MemClk;
+               bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[min_pstate].Voltage;
+               bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[i];
+               bw_params->clk_table.entries[i].socclk_mhz = clock_table->SocClocks[i];
                bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk;
                bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk;
+               bw_params->clk_table.entries[i].wck_ratio = convert_wck_ratio(
+                       clock_table->DfPstateTable[min_pstate].WckRatio);
+       };
+
+       /* Make sure to include at least one entry at highest pstate */
+       if (max_pstate != min_pstate || i == 0) {
+               if (i > MAX_NUM_DPM_LVL - 1)
+                       i = MAX_NUM_DPM_LVL - 1;
+
+               bw_params->clk_table.entries[i].fclk_mhz = max_fclk;
+               bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[max_pstate].MemClk;
+               bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[max_pstate].Voltage;
+               bw_params->clk_table.entries[i].dcfclk_mhz = find_max_clk_value(clock_table->DcfClocks, NUM_DCFCLK_DPM_LEVELS);
+               bw_params->clk_table.entries[i].socclk_mhz = find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS);
+               bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk;
+               bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk;
+               bw_params->clk_table.entries[i].wck_ratio = convert_wck_ratio(
+                       clock_table->DfPstateTable[max_pstate].WckRatio);
+               i++;
        }
+       bw_params->clk_table.num_entries = i--;
+
+       /* Make sure all highest clocks are included*/
+       bw_params->clk_table.entries[i].socclk_mhz = find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS);
+       bw_params->clk_table.entries[i].dispclk_mhz = find_max_clk_value(clock_table->DispClocks, NUM_DISPCLK_DPM_LEVELS);
+       bw_params->clk_table.entries[i].dppclk_mhz = find_max_clk_value(clock_table->DppClocks, NUM_DPPCLK_DPM_LEVELS);
+       ASSERT(clock_table->DcfClocks[i] == find_max_clk_value(clock_table->DcfClocks, NUM_DCFCLK_DPM_LEVELS));
+       bw_params->clk_table.entries[i].phyclk_mhz = def_max.phyclk_mhz;
+       bw_params->clk_table.entries[i].phyclk_d18_mhz = def_max.phyclk_d18_mhz;
+       bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz;
 
+       /*
+        * Set any 0 clocks to max default setting. Not an issue for
+        * power since we aren't doing switching in such case anyway
+        */
+       for (i = 0; i < bw_params->clk_table.num_entries; i++) {
+               if (!bw_params->clk_table.entries[i].fclk_mhz) {
+                       bw_params->clk_table.entries[i].fclk_mhz = def_max.fclk_mhz;
+                       bw_params->clk_table.entries[i].memclk_mhz = def_max.memclk_mhz;
+                       bw_params->clk_table.entries[i].voltage = def_max.voltage;
+               }
+               if (!bw_params->clk_table.entries[i].dcfclk_mhz)
+                       bw_params->clk_table.entries[i].dcfclk_mhz = def_max.dcfclk_mhz;
+               if (!bw_params->clk_table.entries[i].socclk_mhz)
+                       bw_params->clk_table.entries[i].socclk_mhz = def_max.socclk_mhz;
+               if (!bw_params->clk_table.entries[i].dispclk_mhz)
+                       bw_params->clk_table.entries[i].dispclk_mhz = def_max.dispclk_mhz;
+               if (!bw_params->clk_table.entries[i].dppclk_mhz)
+                       bw_params->clk_table.entries[i].dppclk_mhz = def_max.dppclk_mhz;
+               if (!bw_params->clk_table.entries[i].phyclk_mhz)
+                       bw_params->clk_table.entries[i].phyclk_mhz = def_max.phyclk_mhz;
+               if (!bw_params->clk_table.entries[i].phyclk_d18_mhz)
+                       bw_params->clk_table.entries[i].phyclk_d18_mhz = def_max.phyclk_d18_mhz;
+               if (!bw_params->clk_table.entries[i].dtbclk_mhz)
+                       bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz;
+       }
+       ASSERT(bw_params->clk_table.entries[i-1].dcfclk_mhz);
        bw_params->vram_type = bios_info->memory_type;
-       bw_params->num_channels = bios_info->ma_channel_number;
+       bw_params->num_channels = bios_info->ma_channel_number ? bios_info->ma_channel_number : 4;
 
        for (i = 0; i < WM_SET_COUNT; i++) {
                bw_params->wm_table.entries[i].wm_inst = i;
@@ -641,7 +689,7 @@ static struct clk_mgr_funcs dcn314_funcs = {
        .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
        .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz,
        .update_clocks = dcn314_update_clocks,
-       .init_clocks = dcn314_init_clocks,
+       .init_clocks = dcn31_init_clocks,
        .enable_pme_wa = dcn314_enable_pme_wa,
        .are_clock_states_equal = dcn314_are_clock_states_equal,
        .notify_wm_ranges = dcn314_notify_wm_ranges
@@ -681,10 +729,10 @@ void dcn314_clk_mgr_construct(
        }
        ASSERT(clk_mgr->smu_wm_set.wm_set);
 
-       smu_dpm_clks.dpm_clks = (DpmClocks_t *)dm_helpers_allocate_gpu_mem(
+       smu_dpm_clks.dpm_clks = (DpmClocks314_t *)dm_helpers_allocate_gpu_mem(
                                clk_mgr->base.base.ctx,
                                DC_MEM_ALLOC_TYPE_FRAME_BUFFER,
-                               sizeof(DpmClocks_t),
+                               sizeof(DpmClocks314_t),
                                &smu_dpm_clks.mc_address.quad_part);
 
        if (smu_dpm_clks.dpm_clks == NULL) {
@@ -729,7 +777,7 @@ void dcn314_clk_mgr_construct(
        if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) {
                dcn314_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks);
 
-               if (ctx->dc_bios && ctx->dc_bios->integrated_info) {
+               if (ctx->dc_bios && ctx->dc_bios->integrated_info && ctx->dc->config.use_default_clock_table == false) {
                        dcn314_clk_mgr_helper_populate_bw_params(
                                        &clk_mgr->base,
                                        ctx->dc_bios->integrated_info,
index c695a4498c50fc51b6e7f52d755b6a002c5509c7..171f84340eb2fb1d532776ac348cc1fbfad858f5 100644 (file)
@@ -42,7 +42,7 @@ struct clk_mgr_dcn314 {
 
 bool dcn314_are_clock_states_equal(struct dc_clocks *a,
                struct dc_clocks *b);
-void dcn314_init_clocks(struct clk_mgr *clk_mgr);
+
 void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
                        struct dc_state *context,
                        bool safe_to_lower);
index a7958dc965810bb96c09830937aad95171aca2b8..047d19ea919c78ff84386c43fbfeeb77dfb82c28 100644 (file)
@@ -36,6 +36,37 @@ typedef enum {
        WCK_RATIO_MAX
 } WCK_RATIO_e;
 
+typedef struct {
+  uint32_t FClk;
+  uint32_t MemClk;
+  uint32_t Voltage;
+  uint8_t  WckRatio;
+  uint8_t  Spare[3];
+} DfPstateTable314_t;
+
+//Freq in MHz
+//Voltage in milli volts with 2 fractional bits
+typedef struct {
+  uint32_t DcfClocks[NUM_DCFCLK_DPM_LEVELS];
+  uint32_t DispClocks[NUM_DISPCLK_DPM_LEVELS];
+  uint32_t DppClocks[NUM_DPPCLK_DPM_LEVELS];
+  uint32_t SocClocks[NUM_SOCCLK_DPM_LEVELS];
+  uint32_t VClocks[NUM_VCN_DPM_LEVELS];
+  uint32_t DClocks[NUM_VCN_DPM_LEVELS];
+  uint32_t SocVoltage[NUM_SOC_VOLTAGE_LEVELS];
+  DfPstateTable314_t DfPstateTable[NUM_DF_PSTATE_LEVELS];
+
+  uint8_t  NumDcfClkLevelsEnabled;
+  uint8_t  NumDispClkLevelsEnabled; //Applies to both Dispclk and Dppclk
+  uint8_t  NumSocClkLevelsEnabled;
+  uint8_t  VcnClkLevelsEnabled;     //Applies to both Vclk and Dclk
+  uint8_t  NumDfPstatesEnabled;
+  uint8_t  spare[3];
+
+  uint32_t MinGfxClk;
+  uint32_t MaxGfxClk;
+} DpmClocks314_t;
+
 struct dcn314_watermarks {
        // Watermarks
        WatermarkRowGeneric_t WatermarkRow[WM_COUNT][NUM_WM_RANGES];
@@ -43,7 +74,7 @@ struct dcn314_watermarks {
 };
 
 struct dcn314_smu_dpm_clks {
-       DpmClocks_t *dpm_clks;
+       DpmClocks314_t *dpm_clks;
        union large_integer mc_address;
 };
 
index e42f44fc1c08d50bffe551703c1a75df2d62d958..aeecca68dea73b25b64c7de1d920444381a7e1ec 100644 (file)
@@ -1074,8 +1074,15 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
                struct dc_stream_state *old_stream =
                                dc->current_state->res_ctx.pipe_ctx[i].stream;
                bool should_disable = true;
-               bool pipe_split_change =
-                       context->res_ctx.pipe_ctx[i].top_pipe != dc->current_state->res_ctx.pipe_ctx[i].top_pipe;
+               bool pipe_split_change = false;
+
+               if ((context->res_ctx.pipe_ctx[i].top_pipe) &&
+                       (dc->current_state->res_ctx.pipe_ctx[i].top_pipe))
+                       pipe_split_change = context->res_ctx.pipe_ctx[i].top_pipe->pipe_idx !=
+                               dc->current_state->res_ctx.pipe_ctx[i].top_pipe->pipe_idx;
+               else
+                       pipe_split_change = context->res_ctx.pipe_ctx[i].top_pipe !=
+                               dc->current_state->res_ctx.pipe_ctx[i].top_pipe;
 
                for (j = 0; j < context->stream_count; j++) {
                        if (old_stream == context->streams[j]) {
@@ -3229,7 +3236,7 @@ static void commit_planes_for_stream(struct dc *dc,
                                odm_pipe->ttu_regs.min_ttu_vblank = MAX_TTU;
        }
 
-       if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) {
+       if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed)
                if (top_pipe_to_program &&
                        top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) {
                        if (should_use_dmub_lock(stream->link)) {
@@ -3247,7 +3254,6 @@ static void commit_planes_for_stream(struct dc *dc,
                                top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable(
                                                top_pipe_to_program->stream_res.tg);
                }
-       }
 
        if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
                if (dc->hwss.subvp_pipe_control_lock)
@@ -3466,7 +3472,7 @@ static void commit_planes_for_stream(struct dc *dc,
                dc->hwss.pipe_control_lock(dc, top_pipe_to_program, false);
        }
 
-       if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) {
+       if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed)
                if (top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) {
                        top_pipe_to_program->stream_res.tg->funcs->wait_for_state(
                                top_pipe_to_program->stream_res.tg,
@@ -3493,21 +3499,19 @@ static void commit_planes_for_stream(struct dc *dc,
                                top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_disable(
                                        top_pipe_to_program->stream_res.tg);
                }
-       }
 
-       if (update_type != UPDATE_TYPE_FAST) {
+       if (update_type != UPDATE_TYPE_FAST)
                dc->hwss.post_unlock_program_front_end(dc, context);
 
-               /* Since phantom pipe programming is moved to post_unlock_program_front_end,
-                * move the SubVP lock to after the phantom pipes have been setup
-                */
-               if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
-                       if (dc->hwss.subvp_pipe_control_lock)
-                               dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, NULL, subvp_prev_use);
-               } else {
-                       if (dc->hwss.subvp_pipe_control_lock)
-                               dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, top_pipe_to_program, subvp_prev_use);
-               }
+       /* Since phantom pipe programming is moved to post_unlock_program_front_end,
+        * move the SubVP lock to after the phantom pipes have been setup
+        */
+       if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
+               if (dc->hwss.subvp_pipe_control_lock)
+                       dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, NULL, subvp_prev_use);
+       } else {
+               if (dc->hwss.subvp_pipe_control_lock)
+                       dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, top_pipe_to_program, subvp_prev_use);
        }
 
        // Fire manual trigger only when bottom plane is flipped
@@ -4292,7 +4296,7 @@ bool dc_is_dmub_outbox_supported(struct dc *dc)
            !dc->debug.dpia_debug.bits.disable_dpia)
                return true;
 
-       if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_2 &&
+       if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_1 &&
            !dc->debug.dpia_debug.bits.disable_dpia)
                return true;
 
@@ -4340,6 +4344,7 @@ void dc_enable_dmub_outbox(struct dc *dc)
        struct dc_context *dc_ctx = dc->ctx;
 
        dmub_enable_outbox_notification(dc_ctx->dmub_srv);
+       DC_LOG_DC("%s: dmub outbox notifications enabled\n", __func__);
 }
 
 /**
index 9e51338441d079db5fb0cfba982405fb2f4f4080..66d2ae7aacf5eee8888d319c5b3b072a4a1ad938 100644 (file)
@@ -3372,7 +3372,7 @@ bool dc_link_setup_psr(struct dc_link *link,
                switch(link->ctx->asic_id.chip_family) {
                case FAMILY_YELLOW_CARP:
                case AMDGPU_FAMILY_GC_10_3_6:
-               case AMDGPU_FAMILY_GC_11_0_2:
+               case AMDGPU_FAMILY_GC_11_0_1:
                        if(!dc->debug.disable_z10)
                                psr_context->psr_level.bits.SKIP_CRTC_DISABLE = false;
                        break;
index ffc0f1c0ea93b524c6b62bc5399946942c3331fa..7dbab15bfa68fc0d8cd1ccfc1e242d7901803959 100644 (file)
@@ -169,7 +169,7 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id)
                if (ASICREV_IS_GC_11_0_2(asic_id.hw_internal_rev))
                        dc_version = DCN_VERSION_3_21;
                break;
-       case AMDGPU_FAMILY_GC_11_0_2:
+       case AMDGPU_FAMILY_GC_11_0_1:
                dc_version = DCN_VERSION_3_14;
                break;
        default:
index 8e1e40083ec8372113d0a8a79e5137a66205a874..5908b60db313964c9a888b470926a5cc1d478a4c 100644 (file)
@@ -47,7 +47,7 @@ struct aux_payload;
 struct set_config_cmd_payload;
 struct dmub_notification;
 
-#define DC_VER "3.2.196"
+#define DC_VER "3.2.198"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
@@ -213,6 +213,7 @@ struct dc_caps {
        uint32_t cache_num_ways;
        uint16_t subvp_fw_processing_delay_us;
        uint16_t subvp_prefetch_end_to_mall_start_us;
+       uint8_t subvp_swath_height_margin_lines; // subvp start line must be aligned to 2 x swath height
        uint16_t subvp_pstate_allow_width_us;
        uint16_t subvp_vertical_int_margin_us;
        bool seamless_odm;
@@ -352,6 +353,7 @@ struct dc_config {
        bool use_pipe_ctx_sync_logic;
        bool ignore_dpref_ss;
        bool enable_mipi_converter_optimization;
+       bool use_default_clock_table;
 };
 
 enum visual_confirm {
@@ -609,6 +611,7 @@ struct dc_bounding_box_overrides {
        int percent_of_ideal_drambw;
        int dram_clock_change_latency_ns;
        int dummy_clock_change_latency_ns;
+       int fclk_clock_change_latency_ns;
        /* This forces a hard min on the DCFCLK we use
         * for DML.  Unlike the debug option for forcing
         * DCFCLK, this override affects watermark calculations
@@ -751,6 +754,7 @@ struct dc_debug_options {
        uint32_t mst_start_top_delay;
        uint8_t psr_power_use_phy_fsm;
        enum dml_hostvm_override_opts dml_hostvm_override;
+       bool dml_disallow_alternate_prefetch_modes;
        bool use_legacy_soc_bb_mechanism;
        bool exit_idle_opt_for_cursor_updates;
        bool enable_single_display_2to1_odm_policy;
index 2d61c2a91cee269642bf7500b15bb9731518bd9e..09b304507badb6bc5ec6b800e53a771888edeb61 100644 (file)
@@ -29,6 +29,7 @@
 #include "dm_helpers.h"
 #include "dc_hw_types.h"
 #include "core_types.h"
+#include "../basics/conversion.h"
 
 #define CTX dc_dmub_srv->ctx
 #define DC_LOGGER CTX->logger
@@ -275,8 +276,7 @@ void dc_dmub_srv_set_drr_manual_trigger_cmd(struct dc *dc, uint32_t tg_inst)
        union dmub_rb_cmd cmd = { 0 };
 
        cmd.drr_update.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
-       // TODO: Uncomment once FW headers are promoted
-       //cmd.drr_update.header.sub_type = DMUB_CMD__FAMS_SET_MANUAL_TRIGGER;
+       cmd.drr_update.header.sub_type = DMUB_CMD__FAMS_SET_MANUAL_TRIGGER;
        cmd.drr_update.dmub_optc_state_req.tg_inst = tg_inst;
 
        cmd.drr_update.header.payload_bytes = sizeof(cmd.drr_update) - sizeof(cmd.drr_update.header);
@@ -601,6 +601,7 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc,
                        &cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[cmd_pipe_index];
        struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing;
        struct dc_crtc_timing *phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing;
+       uint32_t out_num, out_den;
 
        pipe_data->mode = SUBVP;
        pipe_data->pipe_config.subvp_data.pix_clk_100hz = subvp_pipe->stream->timing.pix_clk_100hz;
@@ -612,6 +613,16 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc,
                        main_timing->v_total - main_timing->v_front_porch - main_timing->v_addressable;
        pipe_data->pipe_config.subvp_data.mall_region_lines = phantom_timing->v_addressable;
        pipe_data->pipe_config.subvp_data.main_pipe_index = subvp_pipe->pipe_idx;
+       pipe_data->pipe_config.subvp_data.is_drr = subvp_pipe->stream->ignore_msa_timing_param;
+
+       /* Calculate the scaling factor from the src and dst height.
+        * e.g. If 3840x2160 being downscaled to 1920x1080, the scaling factor is 1/2.
+        * Reduce the fraction 1080/2160 = 1/2 for the "scaling factor"
+        */
+       reduce_fraction(subvp_pipe->stream->src.height, subvp_pipe->stream->dst.height, &out_num, &out_den);
+       // TODO: Uncomment below lines once DMCUB include headers are promoted
+       //pipe_data->pipe_config.subvp_data.scale_factor_numerator = out_num;
+       //pipe_data->pipe_config.subvp_data.scale_factor_denominator = out_den;
 
        // Prefetch lines is equal to VACTIVE + BP + VSYNC
        pipe_data->pipe_config.subvp_data.prefetch_lines =
index a0af0f6afeef858fcbc74085708c608b4e2026e9..9544abf75e846eab97013154b00e8437be8a399f 100644 (file)
@@ -344,6 +344,7 @@ enum dc_detect_reason {
        DETECT_REASON_HPDRX,
        DETECT_REASON_FALLBACK,
        DETECT_REASON_RETRAIN,
+       DETECT_REASON_TDR,
 };
 
 bool dc_link_detect(struct dc_link *dc_link, enum dc_detect_reason reason);
index 213de8cabfadb39e48a36037ab89e3cde03afdbc..165392380842adbfb8d01f5bb9dbf41c7d551eba 100644 (file)
@@ -543,9 +543,11 @@ static void dce112_get_pix_clk_dividers_helper (
                switch (pix_clk_params->color_depth) {
                case COLOR_DEPTH_101010:
                        actual_pixel_clock_100hz = (actual_pixel_clock_100hz * 5) >> 2;
+                       actual_pixel_clock_100hz -= actual_pixel_clock_100hz % 10;
                        break;
                case COLOR_DEPTH_121212:
                        actual_pixel_clock_100hz = (actual_pixel_clock_100hz * 6) >> 2;
+                       actual_pixel_clock_100hz -= actual_pixel_clock_100hz % 10;
                        break;
                case COLOR_DEPTH_161616:
                        actual_pixel_clock_100hz = actual_pixel_clock_100hz * 2;
index d4a6504dfe0004865a37b331ff113b13e5a7bc17..db7ca4b0cdb9dd4165f9b8d1d53e9be2608ccc89 100644 (file)
@@ -361,8 +361,6 @@ void dpp1_cnv_setup (
                select = INPUT_CSC_SELECT_ICSC;
                break;
        case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
-               pixel_format = 22;
-               break;
        case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
                pixel_format = 26; /* ARGB16161616_UNORM */
                break;
index b54c1240032377b6036347af961044b2e3d44935..564e061ccb589da01bf3e31bd8896521102712c1 100644 (file)
@@ -278,9 +278,6 @@ void hubp1_program_pixel_format(
                                SURFACE_PIXEL_FORMAT, 10);
                break;
        case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
-               REG_UPDATE(DCSURF_SURFACE_CONFIG,
-                               SURFACE_PIXEL_FORMAT, 22);
-               break;
        case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: /*we use crossbar already*/
                REG_UPDATE(DCSURF_SURFACE_CONFIG,
                                SURFACE_PIXEL_FORMAT, 26); /* ARGB16161616_UNORM */
index bed783747f169b522e7ff561204b8bf4d32cd4e9..5b5d952b2b8cd72d3c8c143d12c3cc6822211354 100644 (file)
@@ -110,6 +110,7 @@ void dcn10_lock_all_pipes(struct dc *dc,
                 */
                if (pipe_ctx->top_pipe ||
                    !pipe_ctx->stream ||
+                   !pipe_ctx->plane_state ||
                    !tg->funcs->is_tg_enabled(tg))
                        continue;
 
index 769974375b4b350f33ca19649aa89eb2161ba3a8..8e9384094f6d6b3d8482983baa3440c7d119d49f 100644 (file)
@@ -131,6 +131,12 @@ struct mpcc *mpc1_get_mpcc_for_dpp(struct mpc_tree *tree, int dpp_id)
        while (tmp_mpcc != NULL) {
                if (tmp_mpcc->dpp_id == dpp_id)
                        return tmp_mpcc;
+
+               /* avoid circular linked list */
+               ASSERT(tmp_mpcc != tmp_mpcc->mpcc_bot);
+               if (tmp_mpcc == tmp_mpcc->mpcc_bot)
+                       break;
+
                tmp_mpcc = tmp_mpcc->mpcc_bot;
        }
        return NULL;
index e1a9a45b03b65e32eb824e75cfb761e457953888..3fc300cd1ce9516ab21aa630161e8d5e6e4b7e95 100644 (file)
@@ -465,6 +465,11 @@ void optc1_enable_optc_clock(struct timing_generator *optc, bool enable)
                                OTG_CLOCK_ON, 1,
                                1, 1000);
        } else  {
+
+               //last chance to clear underflow, otherwise, it will always there due to clock is off.
+               if (optc->funcs->is_optc_underflow_occurred(optc) == true)
+                       optc->funcs->clear_optc_underflow(optc);
+
                REG_UPDATE_2(OTG_CLOCK_CONTROL,
                                OTG_CLOCK_GATE_DIS, 0,
                                OTG_CLOCK_EN, 0);
index ea1f14af0db7565fdef5f057fcfdeaf26192d026..eaa7032f0f1a3c11f71e99d5dfc1526f8861eb94 100644 (file)
@@ -166,8 +166,6 @@ static void dpp2_cnv_setup (
                select = DCN2_ICSC_SELECT_ICSC_A;
                break;
        case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
-               pixel_format = 22;
-               break;
        case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
                pixel_format = 26; /* ARGB16161616_UNORM */
                break;
index 936af65381ef725fc433ad36df87be5626895a5e..9570c2118ccc73ae4ce3ffc32f7064c31cfa49a1 100644 (file)
@@ -463,9 +463,6 @@ void hubp2_program_pixel_format(
                                SURFACE_PIXEL_FORMAT, 10);
                break;
        case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
-               REG_UPDATE(DCSURF_SURFACE_CONFIG,
-                               SURFACE_PIXEL_FORMAT, 22);
-               break;
        case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: /*we use crossbar already*/
                REG_UPDATE(DCSURF_SURFACE_CONFIG,
                                SURFACE_PIXEL_FORMAT, 26); /* ARGB16161616_UNORM */
index 3d307dd58e9af70877f296db247b3787b3ce9676..116f67a0b989deb45eca4f1114b509df8dcc6371 100644 (file)
@@ -531,6 +531,12 @@ static struct mpcc *mpc2_get_mpcc_for_dpp(struct mpc_tree *tree, int dpp_id)
        while (tmp_mpcc != NULL) {
                if (tmp_mpcc->dpp_id == 0xf || tmp_mpcc->dpp_id == dpp_id)
                        return tmp_mpcc;
+
+               /* avoid circular linked list */
+               ASSERT(tmp_mpcc != tmp_mpcc->mpcc_bot);
+               if (tmp_mpcc == tmp_mpcc->mpcc_bot)
+                       break;
+
                tmp_mpcc = tmp_mpcc->mpcc_bot;
        }
        return NULL;
index c5e200d09038fba2cf7cfc1eb3ceba438ed33fa9..5752271f22dfedda223a7feabdbaeb0b37047505 100644 (file)
@@ -67,9 +67,15 @@ static uint32_t convert_and_clamp(
 void dcn21_dchvm_init(struct hubbub *hubbub)
 {
        struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub);
-       uint32_t riommu_active;
+       uint32_t riommu_active, prefetch_done;
        int i;
 
+       REG_GET(DCHVM_RIOMMU_STAT0, HOSTVM_PREFETCH_DONE, &prefetch_done);
+
+       if (prefetch_done) {
+               hubbub->riommu_active = true;
+               return;
+       }
        //Init DCHVM block
        REG_UPDATE(DCHVM_CTRL0, HOSTVM_INIT_REQ, 1);
 
index 77b00f86c2165d0d30fc12cb1f12eff7cc8ea89c..4a668d6563dfd6aff3dd329d0bcb27f36bf6c9a3 100644 (file)
@@ -244,8 +244,6 @@ void dpp3_cnv_setup (
                select = INPUT_CSC_SELECT_ICSC;
                break;
        case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
-               pixel_format = 22;
-               break;
        case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
                pixel_format = 26; /* ARGB16161616_UNORM */
                break;
index 6a4dcafb9bba5c3c9f0cb87a2b001c1edcd9c010..dc3e8df706b347a435c77165271c30c5119d101c 100644 (file)
@@ -86,7 +86,7 @@ bool hubp3_program_surface_flip_and_addr(
                        VMID, address->vmid);
 
        if (address->type == PLN_ADDR_TYPE_GRPH_STEREO) {
-               REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0x1);
+               REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0);
                REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x1);
 
        } else {
index 0a67f8a5656decee3fdff57a9d9ad446a8874424..d97076648acba46ea4c53ddb170a59ff43601d7d 100644 (file)
@@ -372,7 +372,7 @@ static struct stream_encoder *dcn303_stream_encoder_create(enum engine_id eng_id
        int afmt_inst;
 
        /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */
-       if (eng_id <= ENGINE_ID_DIGE) {
+       if (eng_id <= ENGINE_ID_DIGB) {
                vpg_inst = eng_id;
                afmt_inst = eng_id;
        } else
index 7c77c71591a08219341a3134b94f0f7606ceed94..82c3b3ac1f0d01459e18cbb27218542c4283f19e 100644 (file)
        SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_SDP_AUDIO_CONTROL0, AIP_ENABLE, mask_sh),\
        SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_SDP_AUDIO_CONTROL0, ACM_ENABLE, mask_sh),\
        SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_VID_CRC_CONTROL, CRC_ENABLE, mask_sh),\
-       SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_VID_CRC_CONTROL, CRC_CONT_MODE_ENABLE, mask_sh)
+       SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_VID_CRC_CONTROL, CRC_CONT_MODE_ENABLE, mask_sh),\
+       SE_SF(DP_SYM32_ENC0_DP_SYM32_ENC_HBLANK_CONTROL, HBLANK_MINIMUM_SYMBOL_WIDTH, mask_sh)
 
 
 #define DCN3_1_HPO_DP_STREAM_ENC_REG_FIELD_LIST(type) \
index 468a893ff7854e451c1588c8b579ceabc0914269..aedff18aff563328b9791d55c984dd4ea5609ce0 100644 (file)
@@ -2153,7 +2153,7 @@ static bool dcn31_resource_construct(
                pool->base.usb4_dpia_count = 4;
        }
 
-       if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_2)
+       if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_1)
                pool->base.usb4_dpia_count = 4;
 
        /* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */
index 41f8ec99da6b386800e5bc1845e13d0c9796f1c2..901436591ed45c29556c2f447ac22c0632128519 100644 (file)
@@ -32,7 +32,6 @@
        container_of(pool, struct dcn31_resource_pool, base)
 
 extern struct _vcs_dpi_ip_params_st dcn3_1_ip;
-extern struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc;
 
 struct dcn31_resource_pool {
        struct resource_pool base;
index e3b5a95e03b19ace16176db0a5a9269e31ba0a12..702c28c2560eb2d7e73c8cec30aee7b14c256434 100644 (file)
 DCN314 = dcn314_resource.o dcn314_hwseq.o dcn314_init.o \
                dcn314_dio_stream_encoder.o dcn314_dccg.o dcn314_optc.o
 
-ifdef CONFIG_X86
-CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o := -mhard-float -msse
-endif
-
-ifdef CONFIG_PPC64
-CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
-IS_OLD_GCC = 1
-endif
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o += -mpreferred-stack-boundary=4
-else
-CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o += -msse2
-endif
-endif
-
 AMD_DAL_DCN314 = $(addprefix $(AMDDALPATH)/dc/dcn314/,$(DCN314))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_DCN314)
index 755c715ad8dceebaf61bdf6c03f2107d59cdcd1f..39931d48f3851cebb7c18a9a73f9ba05f6b00ae1 100644 (file)
@@ -343,7 +343,10 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig
 {
        struct dc_stream_state *stream = pipe_ctx->stream;
        unsigned int odm_combine_factor = 0;
+       struct dc *dc = pipe_ctx->stream->ctx->dc;
+       bool two_pix_per_container = false;
 
+       two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing);
        odm_combine_factor = get_odm_config(pipe_ctx, NULL);
 
        if (is_dp_128b_132b_signal(pipe_ctx)) {
@@ -355,16 +358,13 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig
                else
                        *k2_div = PIXEL_RATE_DIV_BY_4;
        } else if (dc_is_dp_signal(pipe_ctx->stream->signal)) {
-               if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) {
+               if (two_pix_per_container) {
                        *k1_div = PIXEL_RATE_DIV_BY_1;
                        *k2_div = PIXEL_RATE_DIV_BY_2;
-               } else if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) {
-                       *k1_div = PIXEL_RATE_DIV_BY_2;
-                       *k2_div = PIXEL_RATE_DIV_BY_2;
                } else {
-                       if (odm_combine_factor == 1)
-                               *k2_div = PIXEL_RATE_DIV_BY_4;
-                       else if (odm_combine_factor == 2)
+                       *k1_div = PIXEL_RATE_DIV_BY_1;
+                       *k2_div = PIXEL_RATE_DIV_BY_4;
+                       if ((odm_combine_factor == 2) || dc->debug.enable_dp_dig_pixel_rate_div_policy)
                                *k2_div = PIXEL_RATE_DIV_BY_2;
                }
        }
@@ -374,3 +374,31 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig
 
        return odm_combine_factor;
 }
+
+void dcn314_set_pixels_per_cycle(struct pipe_ctx *pipe_ctx)
+{
+       uint32_t pix_per_cycle = 1;
+       uint32_t odm_combine_factor = 1;
+
+       if (!pipe_ctx || !pipe_ctx->stream || !pipe_ctx->stream_res.stream_enc)
+               return;
+
+       odm_combine_factor = get_odm_config(pipe_ctx, NULL);
+       if (optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing) || odm_combine_factor > 1
+               || dcn314_is_dp_dig_pixel_rate_div_policy(pipe_ctx))
+               pix_per_cycle = 2;
+
+       if (pipe_ctx->stream_res.stream_enc->funcs->set_input_mode)
+               pipe_ctx->stream_res.stream_enc->funcs->set_input_mode(pipe_ctx->stream_res.stream_enc,
+                               pix_per_cycle);
+}
+
+bool dcn314_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx)
+{
+       struct dc *dc = pipe_ctx->stream->ctx->dc;
+
+       if (dc_is_dp_signal(pipe_ctx->stream->signal) && !is_dp_128b_132b_signal(pipe_ctx) &&
+               dc->debug.enable_dp_dig_pixel_rate_div_policy)
+               return true;
+       return false;
+}
index be0f5e4d48e13b1fd5d794cb651c759a45aa6ca0..d014580592aca6aa8286beeb42e0d7a70f5e5211 100644 (file)
@@ -39,4 +39,8 @@ void dcn314_enable_power_gating_plane(struct dce_hwseq *hws, bool enable);
 
 unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div);
 
+void dcn314_set_pixels_per_cycle(struct pipe_ctx *pipe_ctx);
+
+bool dcn314_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx);
+
 #endif /* __DC_HWSS_DCN314_H__ */
index b9debeb081fdf1a1352c5b40d822756dcd525767..fcf67eb3478f07e60e361bdaaee35098c4ad145f 100644 (file)
@@ -145,6 +145,8 @@ static const struct hwseq_private_funcs dcn314_private_funcs = {
        .set_shaper_3dlut = dcn20_set_shaper_3dlut,
        .setup_hpo_hw_control = dcn31_setup_hpo_hw_control,
        .calculate_dccg_k1_k2_values = dcn314_calculate_dccg_k1_k2_values,
+       .set_pixels_per_cycle = dcn314_set_pixels_per_cycle,
+       .is_dp_dig_pixel_rate_div_policy = dcn314_is_dp_dig_pixel_rate_div_policy,
 };
 
 void dcn314_hw_sequencer_construct(struct dc *dc)
index 63861cdfb09f2be608d02214c9359cf6fa2112a2..85f32206a7662f55dbef05855b0854f10859ed3a 100644 (file)
@@ -70,6 +70,7 @@
 #include "dce110/dce110_resource.h"
 #include "dml/display_mode_vba.h"
 #include "dml/dcn31/dcn31_fpu.h"
+#include "dml/dcn314/dcn314_fpu.h"
 #include "dcn314/dcn314_dccg.h"
 #include "dcn10/dcn10_resource.h"
 #include "dcn31/dcn31_panel_cntl.h"
@@ -132,155 +133,6 @@ static const struct IP_BASE DCN_BASE = { { { { 0x00000012, 0x000000C0, 0x000034C
 
 #define DC_LOGGER_INIT(logger)
 
-#define DCN3_14_DEFAULT_DET_SIZE 384
-#define DCN3_14_MAX_DET_SIZE 384
-#define DCN3_14_MIN_COMPBUF_SIZE_KB 128
-#define DCN3_14_CRB_SEGMENT_SIZE_KB 64
-struct _vcs_dpi_ip_params_st dcn3_14_ip = {
-       .VBlankNomDefaultUS = 668,
-       .gpuvm_enable = 1,
-       .gpuvm_max_page_table_levels = 1,
-       .hostvm_enable = 1,
-       .hostvm_max_page_table_levels = 2,
-       .rob_buffer_size_kbytes = 64,
-       .det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE,
-       .config_return_buffer_size_in_kbytes = 1792,
-       .compressed_buffer_segment_size_in_kbytes = 64,
-       .meta_fifo_size_in_kentries = 32,
-       .zero_size_buffer_entries = 512,
-       .compbuf_reserved_space_64b = 256,
-       .compbuf_reserved_space_zs = 64,
-       .dpp_output_buffer_pixels = 2560,
-       .opp_output_buffer_lines = 1,
-       .pixel_chunk_size_kbytes = 8,
-       .meta_chunk_size_kbytes = 2,
-       .min_meta_chunk_size_bytes = 256,
-       .writeback_chunk_size_kbytes = 8,
-       .ptoi_supported = false,
-       .num_dsc = 4,
-       .maximum_dsc_bits_per_component = 10,
-       .dsc422_native_support = false,
-       .is_line_buffer_bpp_fixed = true,
-       .line_buffer_fixed_bpp = 48,
-       .line_buffer_size_bits = 789504,
-       .max_line_buffer_lines = 12,
-       .writeback_interface_buffer_size_kbytes = 90,
-       .max_num_dpp = 4,
-       .max_num_otg = 4,
-       .max_num_hdmi_frl_outputs = 1,
-       .max_num_wb = 1,
-       .max_dchub_pscl_bw_pix_per_clk = 4,
-       .max_pscl_lb_bw_pix_per_clk = 2,
-       .max_lb_vscl_bw_pix_per_clk = 4,
-       .max_vscl_hscl_bw_pix_per_clk = 4,
-       .max_hscl_ratio = 6,
-       .max_vscl_ratio = 6,
-       .max_hscl_taps = 8,
-       .max_vscl_taps = 8,
-       .dpte_buffer_size_in_pte_reqs_luma = 64,
-       .dpte_buffer_size_in_pte_reqs_chroma = 34,
-       .dispclk_ramp_margin_percent = 1,
-       .max_inter_dcn_tile_repeaters = 8,
-       .cursor_buffer_size = 16,
-       .cursor_chunk_size = 2,
-       .writeback_line_buffer_buffer_size = 0,
-       .writeback_min_hscl_ratio = 1,
-       .writeback_min_vscl_ratio = 1,
-       .writeback_max_hscl_ratio = 1,
-       .writeback_max_vscl_ratio = 1,
-       .writeback_max_hscl_taps = 1,
-       .writeback_max_vscl_taps = 1,
-       .dppclk_delay_subtotal = 46,
-       .dppclk_delay_scl = 50,
-       .dppclk_delay_scl_lb_only = 16,
-       .dppclk_delay_cnvc_formatter = 27,
-       .dppclk_delay_cnvc_cursor = 6,
-       .dispclk_delay_subtotal = 119,
-       .dynamic_metadata_vm_enabled = false,
-       .odm_combine_4to1_supported = false,
-       .dcc_supported = true,
-};
-
-struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = {
-               /*TODO: correct dispclk/dppclk voltage level determination*/
-       .clock_limits = {
-               {
-                       .state = 0,
-                       .dispclk_mhz = 1200.0,
-                       .dppclk_mhz = 1200.0,
-                       .phyclk_mhz = 600.0,
-                       .phyclk_d18_mhz = 667.0,
-                       .dscclk_mhz = 186.0,
-                       .dtbclk_mhz = 625.0,
-               },
-               {
-                       .state = 1,
-                       .dispclk_mhz = 1200.0,
-                       .dppclk_mhz = 1200.0,
-                       .phyclk_mhz = 810.0,
-                       .phyclk_d18_mhz = 667.0,
-                       .dscclk_mhz = 209.0,
-                       .dtbclk_mhz = 625.0,
-               },
-               {
-                       .state = 2,
-                       .dispclk_mhz = 1200.0,
-                       .dppclk_mhz = 1200.0,
-                       .phyclk_mhz = 810.0,
-                       .phyclk_d18_mhz = 667.0,
-                       .dscclk_mhz = 209.0,
-                       .dtbclk_mhz = 625.0,
-               },
-               {
-                       .state = 3,
-                       .dispclk_mhz = 1200.0,
-                       .dppclk_mhz = 1200.0,
-                       .phyclk_mhz = 810.0,
-                       .phyclk_d18_mhz = 667.0,
-                       .dscclk_mhz = 371.0,
-                       .dtbclk_mhz = 625.0,
-               },
-               {
-                       .state = 4,
-                       .dispclk_mhz = 1200.0,
-                       .dppclk_mhz = 1200.0,
-                       .phyclk_mhz = 810.0,
-                       .phyclk_d18_mhz = 667.0,
-                       .dscclk_mhz = 417.0,
-                       .dtbclk_mhz = 625.0,
-               },
-       },
-       .num_states = 5,
-       .sr_exit_time_us = 9.0,
-       .sr_enter_plus_exit_time_us = 11.0,
-       .sr_exit_z8_time_us = 442.0,
-       .sr_enter_plus_exit_z8_time_us = 560.0,
-       .writeback_latency_us = 12.0,
-       .dram_channel_width_bytes = 4,
-       .round_trip_ping_latency_dcfclk_cycles = 106,
-       .urgent_latency_pixel_data_only_us = 4.0,
-       .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
-       .urgent_latency_vm_data_only_us = 4.0,
-       .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
-       .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
-       .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
-       .pct_ideal_sdp_bw_after_urgent = 80.0,
-       .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
-       .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
-       .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
-       .max_avg_sdp_bw_use_normal_percent = 60.0,
-       .max_avg_dram_bw_use_normal_percent = 60.0,
-       .fabric_datapath_to_dcn_data_return_bytes = 32,
-       .return_bus_width_bytes = 64,
-       .downspread_percent = 0.38,
-       .dcn_downspread_percent = 0.5,
-       .gpuvm_min_page_size_bytes = 4096,
-       .hostvm_min_page_size_bytes = 4096,
-       .do_urgent_latency_adjustment = false,
-       .urgent_latency_adjustment_fabric_clock_component_us = 0,
-       .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
-};
-
 enum dcn31_clk_src_array_id {
        DCN31_CLK_SRC_PLL0,
        DCN31_CLK_SRC_PLL1,
@@ -1402,7 +1254,7 @@ static struct stream_encoder *dcn314_stream_encoder_create(
        int afmt_inst;
 
        /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */
-       if (eng_id <= ENGINE_ID_DIGF) {
+       if (eng_id < ENGINE_ID_DIGF) {
                vpg_inst = eng_id;
                afmt_inst = eng_id;
        } else
@@ -1447,7 +1299,8 @@ static struct hpo_dp_stream_encoder *dcn31_hpo_dp_stream_encoder_create(
         * VPG[8] -> HPO_DP[2]
         * VPG[9] -> HPO_DP[3]
         */
-       vpg_inst = hpo_dp_inst + 6;
+       //Uses offset index 5-8, but actually maps to vpg_inst 6-9
+       vpg_inst = hpo_dp_inst + 5;
 
        /* Mapping of APG register blocks to HPO DP block instance:
         * APG[0] -> HPO_DP[0]
@@ -1793,109 +1646,16 @@ static struct clock_source *dcn31_clock_source_create(
        return NULL;
 }
 
-static bool is_dual_plane(enum surface_pixel_format format)
-{
-       return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA;
-}
-
 static int dcn314_populate_dml_pipes_from_context(
        struct dc *dc, struct dc_state *context,
        display_e2e_pipe_params_st *pipes,
        bool fast_validate)
 {
-       int i, pipe_cnt;
-       struct resource_context *res_ctx = &context->res_ctx;
-       struct pipe_ctx *pipe;
-       bool upscaled = false;
-
-       dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
-
-       for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
-               struct dc_crtc_timing *timing;
-
-               if (!res_ctx->pipe_ctx[i].stream)
-                       continue;
-               pipe = &res_ctx->pipe_ctx[i];
-               timing = &pipe->stream->timing;
-
-               if (dc_extended_blank_supported(dc) && pipe->stream->adjust.v_total_max == pipe->stream->adjust.v_total_min
-                       && pipe->stream->adjust.v_total_min > timing->v_total)
-                       pipes[pipe_cnt].pipe.dest.vtotal = pipe->stream->adjust.v_total_min;
-
-               if (pipe->plane_state &&
-                               (pipe->plane_state->src_rect.height < pipe->plane_state->dst_rect.height ||
-                               pipe->plane_state->src_rect.width < pipe->plane_state->dst_rect.width))
-                       upscaled = true;
-
-               /*
-                * Immediate flip can be set dynamically after enabling the plane.
-                * We need to require support for immediate flip or underflow can be
-                * intermittently experienced depending on peak b/w requirements.
-                */
-               pipes[pipe_cnt].pipe.src.immediate_flip = true;
-
-               pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
-               pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active;
-               pipes[pipe_cnt].pipe.src.gpuvm = true;
-               pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
-               pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
-               pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
-               pipes[pipe_cnt].pipe.src.dcc_rate = 3;
-               pipes[pipe_cnt].dout.dsc_input_bpc = 0;
-
-               if (pipes[pipe_cnt].dout.dsc_enable) {
-                       switch (timing->display_color_depth) {
-                       case COLOR_DEPTH_888:
-                               pipes[pipe_cnt].dout.dsc_input_bpc = 8;
-                               break;
-                       case COLOR_DEPTH_101010:
-                               pipes[pipe_cnt].dout.dsc_input_bpc = 10;
-                               break;
-                       case COLOR_DEPTH_121212:
-                               pipes[pipe_cnt].dout.dsc_input_bpc = 12;
-                               break;
-                       default:
-                               ASSERT(0);
-                               break;
-                       }
-               }
-
-               pipe_cnt++;
-       }
-       context->bw_ctx.dml.ip.det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE;
-
-       dc->config.enable_4to1MPC = false;
-       if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) {
-               if (is_dual_plane(pipe->plane_state->format)
-                               && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) {
-                       dc->config.enable_4to1MPC = true;
-               } else if (!is_dual_plane(pipe->plane_state->format) && pipe->plane_state->src_rect.width <= 5120) {
-                       /* Limit to 5k max to avoid forced pipe split when there is not enough detile for swath */
-                       context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
-                       pipes[0].pipe.src.unbounded_req_mode = true;
-               }
-       } else if (context->stream_count >= dc->debug.crb_alloc_policy_min_disp_count
-                       && dc->debug.crb_alloc_policy > DET_SIZE_DEFAULT) {
-               context->bw_ctx.dml.ip.det_buffer_size_kbytes = dc->debug.crb_alloc_policy * 64;
-       } else if (context->stream_count >= 3 && upscaled) {
-               context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
-       }
-
-       for (i = 0; i < dc->res_pool->pipe_count; i++) {
-               struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
-               if (!pipe->stream)
-                       continue;
+       int pipe_cnt;
 
-               if (pipe->stream->signal == SIGNAL_TYPE_EDP && dc->debug.seamless_boot_odm_combine &&
-                               pipe->stream->apply_seamless_boot_optimization) {
-
-                       if (pipe->stream->apply_boot_odm_mode == dm_odm_combine_policy_2to1) {
-                               context->bw_ctx.dml.vba.ODMCombinePolicy = dm_odm_combine_policy_2to1;
-                               break;
-                       }
-               }
-       }
+       DC_FP_START();
+       pipe_cnt = dcn314_populate_dml_pipes_from_context_fpu(dc, context, pipes, fast_validate);
+       DC_FP_END();
 
        return pipe_cnt;
 }
@@ -1906,88 +1666,9 @@ static struct dc_cap_funcs cap_funcs = {
 
 static void dcn314_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
 {
-       struct clk_limit_table *clk_table = &bw_params->clk_table;
-       struct _vcs_dpi_voltage_scaling_st *clock_tmp = dcn3_14_soc._clock_tmp;
-       unsigned int i, closest_clk_lvl;
-       int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
-       int j;
-
-       // Default clock levels are used for diags, which may lead to overclocking.
-       if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
-
-               dcn3_14_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator;
-               dcn3_14_ip.max_num_dpp = dc->res_pool->pipe_count;
-
-               if (bw_params->num_channels > 0)
-                       dcn3_14_soc.num_chans = bw_params->num_channels;
-
-               ASSERT(dcn3_14_soc.num_chans);
-               ASSERT(clk_table->num_entries);
-
-               /* Prepass to find max clocks independent of voltage level. */
-               for (i = 0; i < clk_table->num_entries; ++i) {
-                       if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
-                               max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
-                       if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
-                               max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
-               }
-
-               for (i = 0; i < clk_table->num_entries; i++) {
-                       /* loop backwards*/
-                       for (closest_clk_lvl = 0, j = dcn3_14_soc.num_states - 1; j >= 0; j--) {
-                               if ((unsigned int) dcn3_14_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) {
-                                       closest_clk_lvl = j;
-                                       break;
-                               }
-                       }
-                       if (clk_table->num_entries == 1) {
-                               /*smu gives one DPM level, let's take the highest one*/
-                               closest_clk_lvl = dcn3_14_soc.num_states - 1;
-                       }
-
-                       clock_tmp[i].state = i;
-
-                       /* Clocks dependent on voltage level. */
-                       clock_tmp[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
-                       if (clk_table->num_entries == 1 &&
-                               clock_tmp[i].dcfclk_mhz < dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) {
-                               /*SMU fix not released yet*/
-                               clock_tmp[i].dcfclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz;
-                       }
-                       clock_tmp[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
-                       clock_tmp[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
-
-                       if (clk_table->entries[i].memclk_mhz && clk_table->entries[i].wck_ratio)
-                               clock_tmp[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio;
-
-                       /* Clocks independent of voltage level. */
-                       clock_tmp[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
-                               dcn3_14_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
-
-                       clock_tmp[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
-                               dcn3_14_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
-
-                       clock_tmp[i].dram_bw_per_chan_gbps = dcn3_14_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
-                       clock_tmp[i].dscclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
-                       clock_tmp[i].dtbclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
-                       clock_tmp[i].phyclk_d18_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
-                       clock_tmp[i].phyclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
-               }
-               for (i = 0; i < clk_table->num_entries; i++)
-                       dcn3_14_soc.clock_limits[i] = clock_tmp[i];
-               if (clk_table->num_entries)
-                       dcn3_14_soc.num_states = clk_table->num_entries;
-       }
-
-       if (max_dispclk_mhz) {
-               dcn3_14_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
-               dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
-       }
-
-       if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment))
-               dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31);
-       else
-               dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31_FPGA);
+       DC_FP_START();
+       dcn314_update_bw_bounding_box_fpu(dc, bw_params);
+       DC_FP_END();
 }
 
 static struct resource_funcs dcn314_res_pool_funcs = {
index c41108847ce08ea843d3e001d038289cc52b42e5..0dd3153aa5c17aaca0bcdf8229a0e788acae102a 100644 (file)
@@ -29,6 +29,9 @@
 
 #include "core_types.h"
 
+extern struct _vcs_dpi_ip_params_st dcn3_14_ip;
+extern struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc;
+
 #define TO_DCN314_RES_POOL(pool)\
        container_of(pool, struct dcn314_resource_pool, base)
 
index 39929fa67a51020443f7887b1bd2790044be51ef..22849eaa6f243eb474789fd8a2874234bc1ea4cd 100644 (file)
@@ -32,7 +32,6 @@
        container_of(pool, struct dcn315_resource_pool, base)
 
 extern struct _vcs_dpi_ip_params_st dcn3_15_ip;
-extern struct _vcs_dpi_ip_params_st dcn3_15_soc;
 
 struct dcn315_resource_pool {
        struct resource_pool base;
index 0dc5a6c13ae7d46f353a5dc7e31dee5cf51fcbe5..aba6d634131b41988f30b26b1f4166a089eca8d2 100644 (file)
@@ -32,7 +32,6 @@
        container_of(pool, struct dcn316_resource_pool, base)
 
 extern struct _vcs_dpi_ip_params_st dcn3_16_ip;
-extern struct _vcs_dpi_ip_params_st dcn3_16_soc;
 
 struct dcn316_resource_pool {
        struct resource_pool base;
index d38341f68b1721f786ea2ae017b73248abd71a62..ebd3945c71f1b6ba3fad7e4423b8bc510dd57a71 100644 (file)
@@ -250,6 +250,7 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
        uint32_t total_lines = 0;
        uint32_t lines_per_way = 0;
        uint32_t num_ways = 0;
+       uint32_t prev_addr_low = 0;
 
        for (i = 0; i < ctx->stream_count; i++) {
                stream = ctx->streams[i];
@@ -267,10 +268,20 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
                        plane = ctx->stream_status[i].plane_states[j];
 
                        // Calculate total surface size
-                       surface_size = plane->plane_size.surface_pitch *
+                       if (prev_addr_low != plane->address.grph.addr.u.low_part) {
+                               /* if plane address are different from prev FB, then userspace allocated separate FBs*/
+                               surface_size += plane->plane_size.surface_pitch *
                                        plane->plane_size.surface_size.height *
                                        (plane->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4);
 
+                               prev_addr_low = plane->address.grph.addr.u.low_part;
+                       } else {
+                               /* We have the same fb for all the planes.
+                                * Xorg always creates one giant fb that holds all surfaces,
+                                * so allocating it once is sufficient.
+                                * */
+                               continue;
+                       }
                        // Convert surface size + starting address to number of cache lines required
                        // (alignment accounted for)
                        cache_lines_used += dcn32_cache_lines_for_surface(dc, surface_size,
@@ -320,7 +331,10 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
 bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable)
 {
        union dmub_rb_cmd cmd;
-       uint8_t ways;
+       uint8_t ways, i;
+       int j;
+       bool stereo_in_use = false;
+       struct dc_plane_state *plane = NULL;
 
        if (!dc->ctx->dmub_srv)
                return false;
@@ -349,7 +363,23 @@ bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable)
                         * and configure HUBP's to fetch from MALL
                         */
                        ways = dcn32_calculate_cab_allocation(dc, dc->current_state);
-                       if (ways <= dc->caps.cache_num_ways) {
+
+                       /* MALL not supported with Stereo3D. If any plane is using stereo,
+                        * don't try to enter MALL.
+                        */
+                       for (i = 0; i < dc->current_state->stream_count; i++) {
+                               for (j = 0; j < dc->current_state->stream_status[i].plane_count; j++) {
+                                       plane = dc->current_state->stream_status[i].plane_states[j];
+
+                                       if (plane->address.type == PLN_ADDR_TYPE_GRPH_STEREO) {
+                                               stereo_in_use = true;
+                                               break;
+                                       }
+                               }
+                               if (stereo_in_use)
+                                       break;
+                       }
+                       if (ways <= dc->caps.cache_num_ways && !stereo_in_use) {
                                memset(&cmd, 0, sizeof(cmd));
                                cmd.cab.header.type = DMUB_CMD__CAB_FOR_SS;
                                cmd.cab.header.sub_type = DMUB_CMD__CAB_DCN_SS_FIT_IN_CAB;
@@ -683,9 +713,11 @@ void dcn32_update_mall_sel(struct dc *dc, struct dc_state *context)
                        if (pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
                                        hubp->funcs->hubp_update_mall_sel(hubp, 1, false);
                        } else {
+                               // MALL not supported with Stereo3D
                                hubp->funcs->hubp_update_mall_sel(hubp,
                                        num_ways <= dc->caps.cache_num_ways &&
-                                       pipe->stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED ? 2 : 0,
+                                       pipe->stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED &&
+                                       pipe->plane_state->address.type !=  PLN_ADDR_TYPE_GRPH_STEREO ? 2 : 0,
                                                        cache_cursor);
                        }
                }
index eff1f4e17689c64428d9a9a88c39cfba96d533f9..1fad7b48bd5beb51d42459856d72ee5385fd064e 100644 (file)
@@ -281,7 +281,7 @@ static struct timing_generator_funcs dcn32_tg_funcs = {
                .lock_doublebuffer_enable = optc3_lock_doublebuffer_enable,
                .lock_doublebuffer_disable = optc3_lock_doublebuffer_disable,
                .enable_optc_clock = optc1_enable_optc_clock,
-               .set_drr = optc31_set_drr, // TODO: Update to optc32_set_drr once FW headers are promoted
+               .set_drr = optc32_set_drr,
                .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal,
                .set_vtotal_min_max = optc3_set_vtotal_min_max,
                .set_static_screen_control = optc1_set_static_screen_control,
index 9a26d24b579f739c769aed46c508448508ca4e70..8b887b552f2c764a92816fe8eb4525c7eb7c6760 100644 (file)
@@ -867,7 +867,7 @@ static const struct dc_debug_options debug_defaults_drv = {
                }
        },
        .use_max_lb = true,
-       .force_disable_subvp = true,
+       .force_disable_subvp = false,
        .exit_idle_opt_for_cursor_updates = true,
        .enable_single_display_2to1_odm_policy = true,
        .enable_dp_dig_pixel_rate_div_policy = 1,
@@ -2051,6 +2051,7 @@ static bool dcn32_resource_construct(
        dc->caps.max_cab_allocation_bytes = 67108864; // 64MB = 1024 * 1024 * 64
        dc->caps.subvp_fw_processing_delay_us = 15;
        dc->caps.subvp_prefetch_end_to_mall_start_us = 15;
+       dc->caps.subvp_swath_height_margin_lines = 16;
        dc->caps.subvp_pstate_allow_width_us = 20;
        dc->caps.subvp_vertical_int_margin_us = 30;
 
index b3f8503cea9c593b5185c72ff4ecaaaea9bb6a5d..955f52e6064df67ac1b0f9c5831d785e7a0b153d 100644 (file)
@@ -63,7 +63,7 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat
                if (pipe->stream && pipe->plane_state && !pipe->top_pipe &&
                                pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
                        bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4;
-                       mall_region_pixels = pipe->stream->timing.h_addressable * pipe->stream->timing.v_addressable;
+                       mall_region_pixels = pipe->plane_state->plane_size.surface_pitch * pipe->stream->timing.v_addressable;
 
                        // For bytes required in MALL, calculate based on number of MBlks required
                        num_mblks = (mall_region_pixels * bytes_per_pixel +
index 8157e40d2c7efb9a129f770b4395ef9e63b0e646..c8b7d6ff38f4fa1887aad87bb8cc6a4bb9bcf0cb 100644 (file)
@@ -868,7 +868,7 @@ static const struct dc_debug_options debug_defaults_drv = {
                }
        },
        .use_max_lb = true,
-       .force_disable_subvp = true,
+       .force_disable_subvp = false,
        .exit_idle_opt_for_cursor_updates = true,
        .enable_single_display_2to1_odm_policy = true,
        .enable_dp_dig_pixel_rate_div_policy = 1,
@@ -1662,8 +1662,9 @@ static bool dcn321_resource_construct(
        dc->caps.max_cab_allocation_bytes = 33554432; // 32MB = 1024 * 1024 * 32
        dc->caps.subvp_fw_processing_delay_us = 15;
        dc->caps.subvp_prefetch_end_to_mall_start_us = 15;
+       dc->caps.subvp_swath_height_margin_lines = 16;
        dc->caps.subvp_pstate_allow_width_us = 20;
-
+       dc->caps.subvp_vertical_int_margin_us = 30;
        dc->caps.max_slave_planes = 1;
        dc->caps.max_slave_yuv_planes = 1;
        dc->caps.max_slave_rgb_planes = 1;
index 359f6e9a1da04fd2207853f9f548b2c576316bf1..86a3b5bfd699b2c9b5a15ea048be1b2fbe6f9990 100644 (file)
@@ -61,7 +61,6 @@ CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags)
@@ -71,6 +70,7 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(fram
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn314/dcn314_fpu.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_ccflags) $(frame_warn_flag)
@@ -82,7 +82,6 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn303/dcn303_fpu.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_ccflags) -Wno-tautological-compare
@@ -131,6 +130,7 @@ DML += dcn321/dcn321_fpu.o
 DML += dcn301/dcn301_fpu.o
 DML += dcn302/dcn302_fpu.o
 DML += dcn303/dcn303_fpu.o
+DML += dcn314/dcn314_fpu.o
 DML += dsc/rc_calc_fpu.o
 DML += calcs/dcn_calcs.o calcs/dcn_calc_math.o calcs/dcn_calc_auto.o
 endif
index ca44df4fca747bc3cf16b0a1e17a2e9079516792..d34e0f1314d9141c4b26fe36f7f1b7d7850a6159 100644 (file)
@@ -30,6 +30,7 @@
 #include "dchubbub.h"
 #include "dcn20/dcn20_resource.h"
 #include "dcn21/dcn21_resource.h"
+#include "clk_mgr/dcn21/rn_clk_mgr.h"
 
 #include "dcn20_fpu.h"
 
index 7ef66e511ec8ef428c09f0cca9238f7b7f6ff7e6..d211cf6d234c7c46bfa342475dab74e1b54ddf88 100644 (file)
@@ -26,6 +26,7 @@
 #include "clk_mgr.h"
 #include "dcn20/dcn20_resource.h"
 #include "dcn301/dcn301_resource.h"
+#include "clk_mgr/dcn301/vg_clk_mgr.h"
 
 #include "dml/dcn20/dcn20_fpu.h"
 #include "dcn301_fpu.h"
index e36cfa5985ea9c6e7b0a7156d3267cbdc52d5672..149a1b17cdf3f34fa26c13fd78ab253da3630dd3 100644 (file)
@@ -25,6 +25,9 @@
 
 #include "resource.h"
 #include "clk_mgr.h"
+#include "dcn31/dcn31_resource.h"
+#include "dcn315/dcn315_resource.h"
+#include "dcn316/dcn316_resource.h"
 
 #include "dml/dcn20/dcn20_fpu.h"
 #include "dcn31_fpu.h"
@@ -114,7 +117,7 @@ struct _vcs_dpi_ip_params_st dcn3_1_ip = {
        .dcc_supported = true,
 };
 
-struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = {
+static struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = {
                /*TODO: correct dispclk/dppclk voltage level determination*/
        .clock_limits = {
                {
@@ -259,7 +262,7 @@ struct _vcs_dpi_ip_params_st dcn3_15_ip = {
        .dcc_supported = true,
 };
 
-struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = {
+static struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = {
        .sr_exit_time_us = 9.0,
        .sr_enter_plus_exit_time_us = 11.0,
        .sr_exit_z8_time_us = 50.0,
@@ -355,7 +358,7 @@ struct _vcs_dpi_ip_params_st dcn3_16_ip = {
        .dcc_supported = true,
 };
 
-struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = {
+static struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = {
                /*TODO: correct dispclk/dppclk voltage level determination*/
        .clock_limits = {
                {
index 3fab19134480d3784dc237b14349bcb0a1377d19..d63b4209b14c080538fb2905129e18354f163dba 100644 (file)
@@ -26,7 +26,7 @@
 #include "dc.h"
 #include "dc_link.h"
 #include "../display_mode_lib.h"
-#include "dml/dcn30/display_mode_vba_30.h"
+#include "../dcn30/display_mode_vba_30.h"
 #include "display_mode_vba_31.h"
 #include "../dml_inline_defs.h"
 
index 66b82e4f05c6e8127c11d8fab35b9e6e9f787444..35d10b4d018bf0507a59cb0089125eed5e0b272b 100644 (file)
@@ -27,7 +27,7 @@
 #include "../display_mode_vba.h"
 #include "../dml_inline_defs.h"
 #include "display_rq_dlg_calc_31.h"
-#include "dml/dcn30/display_mode_vba_30.h"
+#include "../dcn30/display_mode_vba_30.h"
 
 static bool is_dual_plane(enum source_format_class source_format)
 {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
new file mode 100644 (file)
index 0000000..34a5d0f
--- /dev/null
@@ -0,0 +1,376 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "clk_mgr.h"
+#include "resource.h"
+#include "dcn31/dcn31_hubbub.h"
+#include "dcn314_fpu.h"
+#include "dml/dcn20/dcn20_fpu.h"
+#include "dml/display_mode_vba.h"
+
+struct _vcs_dpi_ip_params_st dcn3_14_ip = {
+       .VBlankNomDefaultUS = 668,
+       .gpuvm_enable = 1,
+       .gpuvm_max_page_table_levels = 1,
+       .hostvm_enable = 1,
+       .hostvm_max_page_table_levels = 2,
+       .rob_buffer_size_kbytes = 64,
+       .det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE,
+       .config_return_buffer_size_in_kbytes = 1792,
+       .compressed_buffer_segment_size_in_kbytes = 64,
+       .meta_fifo_size_in_kentries = 32,
+       .zero_size_buffer_entries = 512,
+       .compbuf_reserved_space_64b = 256,
+       .compbuf_reserved_space_zs = 64,
+       .dpp_output_buffer_pixels = 2560,
+       .opp_output_buffer_lines = 1,
+       .pixel_chunk_size_kbytes = 8,
+       .meta_chunk_size_kbytes = 2,
+       .min_meta_chunk_size_bytes = 256,
+       .writeback_chunk_size_kbytes = 8,
+       .ptoi_supported = false,
+       .num_dsc = 4,
+       .maximum_dsc_bits_per_component = 10,
+       .dsc422_native_support = false,
+       .is_line_buffer_bpp_fixed = true,
+       .line_buffer_fixed_bpp = 48,
+       .line_buffer_size_bits = 789504,
+       .max_line_buffer_lines = 12,
+       .writeback_interface_buffer_size_kbytes = 90,
+       .max_num_dpp = 4,
+       .max_num_otg = 4,
+       .max_num_hdmi_frl_outputs = 1,
+       .max_num_wb = 1,
+       .max_dchub_pscl_bw_pix_per_clk = 4,
+       .max_pscl_lb_bw_pix_per_clk = 2,
+       .max_lb_vscl_bw_pix_per_clk = 4,
+       .max_vscl_hscl_bw_pix_per_clk = 4,
+       .max_hscl_ratio = 6,
+       .max_vscl_ratio = 6,
+       .max_hscl_taps = 8,
+       .max_vscl_taps = 8,
+       .dpte_buffer_size_in_pte_reqs_luma = 64,
+       .dpte_buffer_size_in_pte_reqs_chroma = 34,
+       .dispclk_ramp_margin_percent = 1,
+       .max_inter_dcn_tile_repeaters = 8,
+       .cursor_buffer_size = 16,
+       .cursor_chunk_size = 2,
+       .writeback_line_buffer_buffer_size = 0,
+       .writeback_min_hscl_ratio = 1,
+       .writeback_min_vscl_ratio = 1,
+       .writeback_max_hscl_ratio = 1,
+       .writeback_max_vscl_ratio = 1,
+       .writeback_max_hscl_taps = 1,
+       .writeback_max_vscl_taps = 1,
+       .dppclk_delay_subtotal = 46,
+       .dppclk_delay_scl = 50,
+       .dppclk_delay_scl_lb_only = 16,
+       .dppclk_delay_cnvc_formatter = 27,
+       .dppclk_delay_cnvc_cursor = 6,
+       .dispclk_delay_subtotal = 119,
+       .dynamic_metadata_vm_enabled = false,
+       .odm_combine_4to1_supported = false,
+       .dcc_supported = true,
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = {
+               /*TODO: correct dispclk/dppclk voltage level determination*/
+       .clock_limits = {
+               {
+                       .state = 0,
+                       .dispclk_mhz = 1200.0,
+                       .dppclk_mhz = 1200.0,
+                       .phyclk_mhz = 600.0,
+                       .phyclk_d18_mhz = 667.0,
+                       .dscclk_mhz = 186.0,
+                       .dtbclk_mhz = 600.0,
+               },
+               {
+                       .state = 1,
+                       .dispclk_mhz = 1200.0,
+                       .dppclk_mhz = 1200.0,
+                       .phyclk_mhz = 810.0,
+                       .phyclk_d18_mhz = 667.0,
+                       .dscclk_mhz = 209.0,
+                       .dtbclk_mhz = 600.0,
+               },
+               {
+                       .state = 2,
+                       .dispclk_mhz = 1200.0,
+                       .dppclk_mhz = 1200.0,
+                       .phyclk_mhz = 810.0,
+                       .phyclk_d18_mhz = 667.0,
+                       .dscclk_mhz = 209.0,
+                       .dtbclk_mhz = 600.0,
+               },
+               {
+                       .state = 3,
+                       .dispclk_mhz = 1200.0,
+                       .dppclk_mhz = 1200.0,
+                       .phyclk_mhz = 810.0,
+                       .phyclk_d18_mhz = 667.0,
+                       .dscclk_mhz = 371.0,
+                       .dtbclk_mhz = 600.0,
+               },
+               {
+                       .state = 4,
+                       .dispclk_mhz = 1200.0,
+                       .dppclk_mhz = 1200.0,
+                       .phyclk_mhz = 810.0,
+                       .phyclk_d18_mhz = 667.0,
+                       .dscclk_mhz = 417.0,
+                       .dtbclk_mhz = 600.0,
+               },
+       },
+       .num_states = 5,
+       .sr_exit_time_us = 9.0,
+       .sr_enter_plus_exit_time_us = 11.0,
+       .sr_exit_z8_time_us = 442.0,
+       .sr_enter_plus_exit_z8_time_us = 560.0,
+       .writeback_latency_us = 12.0,
+       .dram_channel_width_bytes = 4,
+       .round_trip_ping_latency_dcfclk_cycles = 106,
+       .urgent_latency_pixel_data_only_us = 4.0,
+       .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+       .urgent_latency_vm_data_only_us = 4.0,
+       .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+       .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+       .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+       .pct_ideal_sdp_bw_after_urgent = 80.0,
+       .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
+       .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+       .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
+       .max_avg_sdp_bw_use_normal_percent = 60.0,
+       .max_avg_dram_bw_use_normal_percent = 60.0,
+       .fabric_datapath_to_dcn_data_return_bytes = 32,
+       .return_bus_width_bytes = 64,
+       .downspread_percent = 0.38,
+       .dcn_downspread_percent = 0.5,
+       .gpuvm_min_page_size_bytes = 4096,
+       .hostvm_min_page_size_bytes = 4096,
+       .do_urgent_latency_adjustment = false,
+       .urgent_latency_adjustment_fabric_clock_component_us = 0,
+       .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
+};
+
+
+void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params)
+{
+       struct clk_limit_table *clk_table = &bw_params->clk_table;
+       struct _vcs_dpi_voltage_scaling_st *clock_limits =
+               dcn3_14_soc.clock_limits;
+       unsigned int i, closest_clk_lvl;
+       int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
+       int j;
+
+       dc_assert_fp_enabled();
+
+       // Default clock levels are used for diags, which may lead to overclocking.
+       if (!IS_DIAG_DC(dc->ctx->dce_environment) && dc->config.use_default_clock_table == false) {
+
+               dcn3_14_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator;
+               dcn3_14_ip.max_num_dpp = dc->res_pool->pipe_count;
+
+               if (bw_params->num_channels > 0)
+                       dcn3_14_soc.num_chans = bw_params->num_channels;
+
+               ASSERT(dcn3_14_soc.num_chans);
+               ASSERT(clk_table->num_entries);
+
+               /* Prepass to find max clocks independent of voltage level. */
+               for (i = 0; i < clk_table->num_entries; ++i) {
+                       if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
+                               max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
+                       if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
+                               max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
+               }
+
+               for (i = 0; i < clk_table->num_entries; i++) {
+                       /* loop backwards*/
+                       for (closest_clk_lvl = 0, j = dcn3_14_soc.num_states - 1; j >= 0; j--) {
+                               if ((unsigned int) dcn3_14_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) {
+                                       closest_clk_lvl = j;
+                                       break;
+                               }
+                       }
+                       if (clk_table->num_entries == 1) {
+                               /*smu gives one DPM level, let's take the highest one*/
+                               closest_clk_lvl = dcn3_14_soc.num_states - 1;
+                       }
+
+                       clock_limits[i].state = i;
+
+                       /* Clocks dependent on voltage level. */
+                       clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+                       if (clk_table->num_entries == 1 &&
+                               clock_limits[i].dcfclk_mhz < dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) {
+                               /*SMU fix not released yet*/
+                               clock_limits[i].dcfclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz;
+                       }
+                       clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
+                       clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
+
+                       if (clk_table->entries[i].memclk_mhz && clk_table->entries[i].wck_ratio)
+                               clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio;
+
+                       /* Clocks independent of voltage level. */
+                       clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
+                               dcn3_14_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+
+                       clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
+                               dcn3_14_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+
+                       clock_limits[i].dram_bw_per_chan_gbps = dcn3_14_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+                       clock_limits[i].dscclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+                       clock_limits[i].dtbclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+                       clock_limits[i].phyclk_d18_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+                       clock_limits[i].phyclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+               }
+               for (i = 0; i < clk_table->num_entries; i++)
+                       dcn3_14_soc.clock_limits[i] = clock_limits[i];
+               if (clk_table->num_entries) {
+                       dcn3_14_soc.num_states = clk_table->num_entries;
+               }
+       }
+
+       if (max_dispclk_mhz) {
+               dcn3_14_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+               dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+       }
+
+       if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment))
+               dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31);
+       else
+               dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31_FPGA);
+}
+
+static bool is_dual_plane(enum surface_pixel_format format)
+{
+       return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA;
+}
+
+int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context,
+                                              display_e2e_pipe_params_st *pipes,
+                                              bool fast_validate)
+{
+       int i, pipe_cnt;
+       struct resource_context *res_ctx = &context->res_ctx;
+       struct pipe_ctx *pipe;
+       bool upscaled = false;
+
+       dc_assert_fp_enabled();
+
+       dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
+
+       for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+               struct dc_crtc_timing *timing;
+
+               if (!res_ctx->pipe_ctx[i].stream)
+                       continue;
+               pipe = &res_ctx->pipe_ctx[i];
+               timing = &pipe->stream->timing;
+
+               if (dc_extended_blank_supported(dc) && pipe->stream->adjust.v_total_max == pipe->stream->adjust.v_total_min
+                       && pipe->stream->adjust.v_total_min > timing->v_total)
+                       pipes[pipe_cnt].pipe.dest.vtotal = pipe->stream->adjust.v_total_min;
+
+               if (pipe->plane_state &&
+                               (pipe->plane_state->src_rect.height < pipe->plane_state->dst_rect.height ||
+                               pipe->plane_state->src_rect.width < pipe->plane_state->dst_rect.width))
+                       upscaled = true;
+
+               /*
+                * Immediate flip can be set dynamically after enabling the plane.
+                * We need to require support for immediate flip or underflow can be
+                * intermittently experienced depending on peak b/w requirements.
+                */
+               pipes[pipe_cnt].pipe.src.immediate_flip = true;
+
+               pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
+               pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active;
+               pipes[pipe_cnt].pipe.src.gpuvm = true;
+               pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
+               pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+               pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
+               pipes[pipe_cnt].pipe.src.dcc_rate = 3;
+               pipes[pipe_cnt].dout.dsc_input_bpc = 0;
+
+               if (pipes[pipe_cnt].dout.dsc_enable) {
+                       switch (timing->display_color_depth) {
+                       case COLOR_DEPTH_888:
+                               pipes[pipe_cnt].dout.dsc_input_bpc = 8;
+                               break;
+                       case COLOR_DEPTH_101010:
+                               pipes[pipe_cnt].dout.dsc_input_bpc = 10;
+                               break;
+                       case COLOR_DEPTH_121212:
+                               pipes[pipe_cnt].dout.dsc_input_bpc = 12;
+                               break;
+                       default:
+                               ASSERT(0);
+                               break;
+                       }
+               }
+
+               pipe_cnt++;
+       }
+       context->bw_ctx.dml.ip.det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE;
+
+       dc->config.enable_4to1MPC = false;
+       if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) {
+               if (is_dual_plane(pipe->plane_state->format)
+                               && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) {
+                       dc->config.enable_4to1MPC = true;
+               } else if (!is_dual_plane(pipe->plane_state->format) && pipe->plane_state->src_rect.width <= 5120) {
+                       /* Limit to 5k max to avoid forced pipe split when there is not enough detile for swath */
+                       context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
+                       pipes[0].pipe.src.unbounded_req_mode = true;
+               }
+       } else if (context->stream_count >= dc->debug.crb_alloc_policy_min_disp_count
+                       && dc->debug.crb_alloc_policy > DET_SIZE_DEFAULT) {
+               context->bw_ctx.dml.ip.det_buffer_size_kbytes = dc->debug.crb_alloc_policy * 64;
+       } else if (context->stream_count >= 3 && upscaled) {
+               context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
+       }
+
+       for (i = 0; i < dc->res_pool->pipe_count; i++) {
+               struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+               if (!pipe->stream)
+                       continue;
+
+               if (pipe->stream->signal == SIGNAL_TYPE_EDP && dc->debug.seamless_boot_odm_combine &&
+                               pipe->stream->apply_seamless_boot_optimization) {
+
+                       if (pipe->stream->apply_boot_odm_mode == dm_odm_combine_policy_2to1) {
+                               context->bw_ctx.dml.vba.ODMCombinePolicy = dm_odm_combine_policy_2to1;
+                               break;
+                       }
+               }
+       }
+
+       return pipe_cnt;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h
new file mode 100644 (file)
index 0000000..d32c5bb
--- /dev/null
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN314_FPU_H__
+#define __DCN314_FPU_H__
+
+#define DCN3_14_DEFAULT_DET_SIZE 384
+#define DCN3_14_MAX_DET_SIZE 384
+#define DCN3_14_MIN_COMPBUF_SIZE_KB 128
+#define DCN3_14_CRB_SEGMENT_SIZE_KB 64
+
+void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params);
+int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context,
+                                              display_e2e_pipe_params_st *pipes,
+                                              bool fast_validate);
+
+#endif
index 66453546e24fe9b42a6966fe06f57e81dedb6d9c..8118cfc5b405672b84fe8cce9daa812b572277a0 100644 (file)
@@ -473,8 +473,11 @@ void dcn32_set_phantom_stream_timing(struct dc *dc,
 
        // DML calculation for MALL region doesn't take into account FW delay
        // and required pstate allow width for multi-display cases
+       /* Add 16 lines margin to the MALL REGION because SUB_VP_START_LINE must be aligned
+        * to 2 swaths (i.e. 16 lines)
+        */
        phantom_vactive = get_subviewport_lines_needed_in_mall(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx) +
-                               pstate_width_fw_delay_lines;
+                               pstate_width_fw_delay_lines + dc->caps.subvp_swath_height_margin_lines;
 
        // For backporch of phantom pipe, use vstartup of the main pipe
        phantom_bp = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
@@ -490,6 +493,7 @@ void dcn32_set_phantom_stream_timing(struct dc *dc,
                                                phantom_stream->timing.v_front_porch +
                                                phantom_stream->timing.v_sync_width +
                                                phantom_bp;
+       phantom_stream->timing.flags.DSC = 0; // Don't need DSC for phantom timing
 }
 
 /**
@@ -983,9 +987,15 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
         * DML favors voltage over p-state, but we're more interested in
         * supporting p-state over voltage. We can't support p-state in
         * prefetch mode > 0 so try capping the prefetch mode to start.
+        * Override present for testing.
         */
-       context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
+       if (dc->debug.dml_disallow_alternate_prefetch_modes)
+               context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
                        dm_prefetch_support_uclk_fclk_and_stutter;
+       else
+               context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
+                       dm_prefetch_support_uclk_fclk_and_stutter_if_possible;
+
        *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt);
        /* This may adjust vlevel and maxMpcComb */
        if (*vlevel < context->bw_ctx.dml.soc.num_states)
@@ -1014,7 +1024,9 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
                         * will not allow for switch in VBLANK. The DRR display must have it's VBLANK stretched
                         * enough to support MCLK switching.
                         */
-                       if (*vlevel == context->bw_ctx.dml.soc.num_states) {
+                       if (*vlevel == context->bw_ctx.dml.soc.num_states &&
+                               context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final ==
+                                       dm_prefetch_support_uclk_fclk_and_stutter) {
                                context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
                                                                dm_prefetch_support_stutter;
                                /* There are params (such as FabricClock) that need to be recalculated
@@ -1344,7 +1356,8 @@ bool dcn32_internal_validate_bw(struct dc *dc,
        int split[MAX_PIPES] = { 0 };
        bool merge[MAX_PIPES] = { false };
        bool newly_split[MAX_PIPES] = { false };
-       int pipe_cnt, i, pipe_idx, vlevel;
+       int pipe_cnt, i, pipe_idx;
+       int vlevel = context->bw_ctx.dml.soc.num_states;
        struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
 
        dc_assert_fp_enabled();
@@ -1373,17 +1386,22 @@ bool dcn32_internal_validate_bw(struct dc *dc,
                DC_FP_END();
        }
 
-       if (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states ||
-                       vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) {
+       if (fast_validate ||
+                       (dc->debug.dml_disallow_alternate_prefetch_modes &&
+                       (vlevel == context->bw_ctx.dml.soc.num_states ||
+                               vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported))) {
                /*
-                * If mode is unsupported or there's still no p-state support then
-                * fall back to favoring voltage.
+                * If dml_disallow_alternate_prefetch_modes is false, then we have already
+                * tried alternate prefetch modes during full validation.
+                *
+                * If mode is unsupported or there is no p-state support, then
+                * fall back to favouring voltage.
                 *
-                * If Prefetch mode 0 failed for this config, or passed with Max UCLK, try if
-                * supported with Prefetch mode 1 (dm_prefetch_support_fclk_and_stutter == 2)
+                * If Prefetch mode 0 failed for this config, or passed with Max UCLK, then try
+                * to support with Prefetch mode 1 (dm_prefetch_support_fclk_and_stutter == 2)
                 */
                context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
-                               dm_prefetch_support_fclk_and_stutter;
+                       dm_prefetch_support_fclk_and_stutter;
 
                vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
 
@@ -2098,6 +2116,13 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
                                dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
                }
 
+               if ((int)(dcn3_2_soc.fclk_change_latency_us * 1000)
+                               != dc->bb_overrides.fclk_clock_change_latency_ns
+                               && dc->bb_overrides.fclk_clock_change_latency_ns) {
+                       dcn3_2_soc.fclk_change_latency_us =
+                               dc->bb_overrides.fclk_clock_change_latency_ns / 1000;
+               }
+
                if ((int)(dcn3_2_soc.dummy_pstate_latency_us * 1000)
                                != dc->bb_overrides.dummy_clock_change_latency_ns
                                && dc->bb_overrides.dummy_clock_change_latency_ns) {
index 890612db08dc4224bb4ac960ea5ba881fc3a9714..cb2025771646b916d6d0d23224889e6d9d3921a2 100644 (file)
@@ -221,7 +221,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
                // VBA_DELTA
                // Calculate DET size, swath height
                dml32_CalculateSwathAndDETConfiguration(
-                               &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration,
                                mode_lib->vba.DETSizeOverride,
                                mode_lib->vba.UsesMALLForPStateChange,
                                mode_lib->vba.ConfigReturnBufferSizeInKByte,
@@ -461,7 +460,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
        {
 
                dml32_CalculateVMRowAndSwath(
-                               &v->dummy_vars.dml32_CalculateVMRowAndSwath,
                                mode_lib->vba.NumberOfActiveSurfaces,
                                v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters,
                                v->SurfaceSizeInMALL,
@@ -757,9 +755,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
                        v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelY = v->BytePerPixelY[k];
                        v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelC = v->BytePerPixelC[k];
                        v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP;
-                       v->ErrorResult[k] = dml32_CalculatePrefetchSchedule(
-                                       &v->dummy_vars.dml32_CalculatePrefetchSchedule,
-                                       v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor,
+                       v->ErrorResult[k] = dml32_CalculatePrefetchSchedule(v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor,
                                        &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe, v->DSCDelay[k],
                                        mode_lib->vba.DPPCLKDelaySubtotal + mode_lib->vba.DPPCLKDelayCNVCFormater,
                                        mode_lib->vba.DPPCLKDelaySCL,
@@ -1167,7 +1163,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
                v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SMNLatency = mode_lib->vba.SMNLatency;
 
                dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
-                       &v->dummy_vars.dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport,
                        mode_lib->vba.USRRetrainingRequiredFinal,
                        mode_lib->vba.UsesMALLForPStateChange,
                        mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
@@ -1952,7 +1947,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
        }
 
        dml32_CalculateSwathAndDETConfiguration(
-                       &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration,
                        mode_lib->vba.DETSizeOverride,
                        mode_lib->vba.UsesMALLForPStateChange,
                        mode_lib->vba.ConfigReturnBufferSizeInKByte,
@@ -2549,7 +2543,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
                        }
 
                        dml32_CalculateSwathAndDETConfiguration(
-                                       &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration,
                                        mode_lib->vba.DETSizeOverride,
                                        mode_lib->vba.UsesMALLForPStateChange,
                                        mode_lib->vba.ConfigReturnBufferSizeInKByte,
@@ -2749,7 +2742,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 
                        {
                                dml32_CalculateVMRowAndSwath(
-                                               &v->dummy_vars.dml32_CalculateVMRowAndSwath,
                                                mode_lib->vba.NumberOfActiveSurfaces,
                                                v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters,
                                                mode_lib->vba.SurfaceSizeInMALL,
@@ -3266,7 +3258,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 
                                        mode_lib->vba.NoTimeForPrefetch[i][j][k] =
                                                dml32_CalculatePrefetchSchedule(
-                                                       &v->dummy_vars.dml32_CalculatePrefetchSchedule,
                                                        v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor,
                                                        &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe,
                                                        mode_lib->vba.DSCDelayPerState[i][k],
@@ -3566,7 +3557,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 
                        {
                                dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
-                                               &v->dummy_vars.dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport,
                                                mode_lib->vba.USRRetrainingRequiredFinal,
                                                mode_lib->vba.UsesMALLForPStateChange,
                                                mode_lib->vba.PrefetchModePerState[i][j],
index 07f8f3b8626b2a6b7c67ca04145600d3e497c252..05fc14a47fba91b86cc5579c823da689bc719a59 100644 (file)
@@ -391,7 +391,6 @@ void dml32_CalculateBytePerPixelAndBlockSizes(
 } // CalculateBytePerPixelAndBlockSizes
 
 void dml32_CalculateSwathAndDETConfiguration(
-               struct dml32_CalculateSwathAndDETConfiguration *st_vars,
                unsigned int DETSizeOverride[],
                enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
                unsigned int ConfigReturnBufferSizeInKByte,
@@ -456,10 +455,18 @@ void dml32_CalculateSwathAndDETConfiguration(
                bool ViewportSizeSupportPerSurface[],
                bool *ViewportSizeSupport)
 {
+       unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
+       unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
+       unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
+       unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
+       unsigned int RoundedUpSwathSizeBytesY;
+       unsigned int RoundedUpSwathSizeBytesC;
+       double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
+       double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
        unsigned int k;
-
-       st_vars->TotalActiveDPP = 0;
-       st_vars->NoChromaSurfaces = true;
+       unsigned int TotalActiveDPP = 0;
+       bool NoChromaSurfaces = true;
+       unsigned int DETBufferSizeInKByteForSwathCalculation;
 
 #ifdef __DML_VBA_DEBUG__
        dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
@@ -494,43 +501,43 @@ void dml32_CalculateSwathAndDETConfiguration(
                        DPPPerSurface,
 
                        /* Output */
-                       st_vars->SwathWidthdoubleDPP,
-                       st_vars->SwathWidthdoubleDPPChroma,
+                       SwathWidthdoubleDPP,
+                       SwathWidthdoubleDPPChroma,
                        SwathWidth,
                        SwathWidthChroma,
-                       st_vars->MaximumSwathHeightY,
-                       st_vars->MaximumSwathHeightC,
+                       MaximumSwathHeightY,
+                       MaximumSwathHeightC,
                        swath_width_luma_ub,
                        swath_width_chroma_ub);
 
        for (k = 0; k < NumberOfActiveSurfaces; ++k) {
-               st_vars->RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * st_vars->MaximumSwathHeightY[k];
-               st_vars->RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * st_vars->MaximumSwathHeightC[k];
+               RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
+               RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
 #ifdef __DML_VBA_DEBUG__
                dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
                dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
                dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
-               dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, st_vars->MaximumSwathHeightY[k]);
+               dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]);
                dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
-                               st_vars->RoundedUpMaxSwathSizeBytesY[k]);
+                               RoundedUpMaxSwathSizeBytesY[k]);
                dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
                dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
-               dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, st_vars->MaximumSwathHeightC[k]);
+               dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]);
                dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
-                               st_vars->RoundedUpMaxSwathSizeBytesC[k]);
+                               RoundedUpMaxSwathSizeBytesC[k]);
 #endif
 
                if (SourcePixelFormat[k] == dm_420_10) {
-                       st_vars->RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesY[k], 256);
-                       st_vars->RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesC[k], 256);
+                       RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256);
+                       RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256);
                }
        }
 
        for (k = 0; k < NumberOfActiveSurfaces; ++k) {
-               st_vars->TotalActiveDPP = st_vars->TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
+               TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
                if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
                                SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
-                       st_vars->NoChromaSurfaces = false;
+                       NoChromaSurfaces = false;
                }
        }
 
@@ -540,10 +547,10 @@ void dml32_CalculateSwathAndDETConfiguration(
        // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
        // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
        // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
-       *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (st_vars->RoundedUpMaxSwathSizeBytesY[0]/512);
+       *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512);
 
        if (*CompBufReservedSpaceNeedAdjustment == 1) {
-               *CompBufReservedSpaceKBytes = ROBSizeKBytes - st_vars->RoundedUpMaxSwathSizeBytesY[0]/512;
+               *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512;
        }
 
        #ifdef __DML_VBA_DEBUG__
@@ -551,7 +558,7 @@ void dml32_CalculateSwathAndDETConfiguration(
                dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, *CompBufReservedSpaceNeedAdjustment);
        #endif
 
-       *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, st_vars->TotalActiveDPP, st_vars->NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
+       *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
 
        dml32_CalculateDETBufferSize(DETSizeOverride,
                        UseMALLForPStateChange,
@@ -566,8 +573,8 @@ void dml32_CalculateSwathAndDETConfiguration(
                        SourcePixelFormat,
                        ReadBandwidthLuma,
                        ReadBandwidthChroma,
-                       st_vars->RoundedUpMaxSwathSizeBytesY,
-                       st_vars->RoundedUpMaxSwathSizeBytesC,
+                       RoundedUpMaxSwathSizeBytesY,
+                       RoundedUpMaxSwathSizeBytesC,
                        DPPPerSurface,
 
                        /* Output */
@@ -575,7 +582,7 @@ void dml32_CalculateSwathAndDETConfiguration(
                        CompressedBufferSizeInkByte);
 
 #ifdef __DML_VBA_DEBUG__
-       dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, st_vars->TotalActiveDPP);
+       dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
        dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
        dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
        dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
@@ -586,42 +593,42 @@ void dml32_CalculateSwathAndDETConfiguration(
        *ViewportSizeSupport = true;
        for (k = 0; k < NumberOfActiveSurfaces; ++k) {
 
-               st_vars->DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
+               DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
                                dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
 #ifdef __DML_VBA_DEBUG__
                dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
-                               st_vars->DETBufferSizeInKByteForSwathCalculation);
+                               DETBufferSizeInKByteForSwathCalculation);
 #endif
 
-               if (st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] <=
-                               st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
-                       SwathHeightY[k] = st_vars->MaximumSwathHeightY[k];
-                       SwathHeightC[k] = st_vars->MaximumSwathHeightC[k];
-                       st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k];
-                       st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k];
-               } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] &&
-                               st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] <=
-                               st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
-                       SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2;
-                       SwathHeightC[k] = st_vars->MaximumSwathHeightC[k];
-                       st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2;
-                       st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k];
-               } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] < 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] &&
-                               st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 <=
-                               st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
-                       SwathHeightY[k] = st_vars->MaximumSwathHeightY[k];
-                       SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2;
-                       st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k];
-                       st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2;
+               if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
+                               DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+                       SwathHeightY[k] = MaximumSwathHeightY[k];
+                       SwathHeightC[k] = MaximumSwathHeightC[k];
+                       RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
+                       RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
+               } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
+                               RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
+                               DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+                       SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
+                       SwathHeightC[k] = MaximumSwathHeightC[k];
+                       RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
+                       RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
+               } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
+                               RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
+                               DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+                       SwathHeightY[k] = MaximumSwathHeightY[k];
+                       SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
+                       RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
+                       RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
                } else {
-                       SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2;
-                       SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2;
-                       st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2;
-                       st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2;
+                       SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
+                       SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
+                       RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
+                       RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
                }
 
-               if ((st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 >
-                               st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
+               if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
+                               DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
                                || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
                                                SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
                        *ViewportSizeSupport = false;
@@ -636,7 +643,7 @@ void dml32_CalculateSwathAndDETConfiguration(
 #endif
                        DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
                        DETBufferSizeC[k] = 0;
-               } else if (st_vars->RoundedUpSwathSizeBytesY <= 1.5 * st_vars->RoundedUpSwathSizeBytesC) {
+               } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
 #ifdef __DML_VBA_DEBUG__
                        dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
 #endif
@@ -654,11 +661,11 @@ void dml32_CalculateSwathAndDETConfiguration(
                dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
                dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
                dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
-                               k, st_vars->RoundedUpMaxSwathSizeBytesY[k]);
+                               k, RoundedUpMaxSwathSizeBytesY[k]);
                dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
-                               k, st_vars->RoundedUpMaxSwathSizeBytesC[k]);
-               dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesY);
-               dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesC);
+                               k, RoundedUpMaxSwathSizeBytesC[k]);
+               dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY);
+               dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC);
                dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
                dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
                dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
@@ -1867,7 +1874,6 @@ void dml32_CalculateSurfaceSizeInMall(
 } // CalculateSurfaceSizeInMall
 
 void dml32_CalculateVMRowAndSwath(
-               struct dml32_CalculateVMRowAndSwath *st_vars,
                unsigned int NumberOfActiveSurfaces,
                DmlPipe myPipe[],
                unsigned int SurfaceSizeInMALL[],
@@ -1933,6 +1939,21 @@ void dml32_CalculateVMRowAndSwath(
                unsigned int BIGK_FRAGMENT_SIZE[])
 {
        unsigned int k;
+       unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
+       unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
+       unsigned int PDEAndMetaPTEBytesFrameY;
+       unsigned int PDEAndMetaPTEBytesFrameC;
+       unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
+       unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
+       unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
+       unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
+       unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
+       unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
+       unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
+       unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
+       unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
+       unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
+       bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
 
        for (k = 0; k < NumberOfActiveSurfaces; ++k) {
                if (HostVMEnable == true) {
@@ -1954,15 +1975,15 @@ void dml32_CalculateVMRowAndSwath(
                                myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
                        if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
                                        !IsVertical(myPipe[k].SourceRotation)) {
-                               st_vars->PTEBufferSizeInRequestsForLuma[k] =
+                               PTEBufferSizeInRequestsForLuma[k] =
                                                (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
-                               st_vars->PTEBufferSizeInRequestsForChroma[k] = st_vars->PTEBufferSizeInRequestsForLuma[k];
+                               PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
                        } else {
-                               st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
-                               st_vars->PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
+                               PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
+                               PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
                        }
 
-                       st_vars->PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
+                       PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
                                        myPipe[k].ViewportStationary,
                                        myPipe[k].DCCEnable,
                                        myPipe[k].DPPPerSurface,
@@ -1982,21 +2003,21 @@ void dml32_CalculateVMRowAndSwath(
                                        GPUVMMaxPageTableLevels,
                                        GPUVMMinPageSizeKBytes[k],
                                        HostVMMinPageSize,
-                                       st_vars->PTEBufferSizeInRequestsForChroma[k],
+                                       PTEBufferSizeInRequestsForChroma[k],
                                        myPipe[k].PitchC,
                                        myPipe[k].DCCMetaPitchC,
                                        myPipe[k].BlockWidthC,
                                        myPipe[k].BlockHeightC,
 
                                        /* Output */
-                                       &st_vars->MetaRowByteC[k],
-                                       &st_vars->PixelPTEBytesPerRowC[k],
+                                       &MetaRowByteC[k],
+                                       &PixelPTEBytesPerRowC[k],
                                        &dpte_row_width_chroma_ub[k],
                                        &dpte_row_height_chroma[k],
                                        &dpte_row_height_linear_chroma[k],
-                                       &st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k],
-                                       &st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k],
-                                       &st_vars->dpte_row_height_chroma_one_row_per_frame[k],
+                                       &PixelPTEBytesPerRowC_one_row_per_frame[k],
+                                       &dpte_row_width_chroma_ub_one_row_per_frame[k],
+                                       &dpte_row_height_chroma_one_row_per_frame[k],
                                        &meta_req_width_chroma[k],
                                        &meta_req_height_chroma[k],
                                        &meta_row_width_chroma[k],
@@ -2024,19 +2045,19 @@ void dml32_CalculateVMRowAndSwath(
                                        &VInitPreFillC[k],
                                        &MaxNumSwathC[k]);
                } else {
-                       st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
-                       st_vars->PTEBufferSizeInRequestsForChroma[k] = 0;
-                       st_vars->PixelPTEBytesPerRowC[k] = 0;
-                       st_vars->PDEAndMetaPTEBytesFrameC = 0;
-                       st_vars->MetaRowByteC[k] = 0;
+                       PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
+                       PTEBufferSizeInRequestsForChroma[k] = 0;
+                       PixelPTEBytesPerRowC[k] = 0;
+                       PDEAndMetaPTEBytesFrameC = 0;
+                       MetaRowByteC[k] = 0;
                        MaxNumSwathC[k] = 0;
                        PrefetchSourceLinesC[k] = 0;
-                       st_vars->dpte_row_height_chroma_one_row_per_frame[k] = 0;
-                       st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
-                       st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
+                       dpte_row_height_chroma_one_row_per_frame[k] = 0;
+                       dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
+                       PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
                }
 
-               st_vars->PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
+               PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
                                myPipe[k].ViewportStationary,
                                myPipe[k].DCCEnable,
                                myPipe[k].DPPPerSurface,
@@ -2056,21 +2077,21 @@ void dml32_CalculateVMRowAndSwath(
                                GPUVMMaxPageTableLevels,
                                GPUVMMinPageSizeKBytes[k],
                                HostVMMinPageSize,
-                               st_vars->PTEBufferSizeInRequestsForLuma[k],
+                               PTEBufferSizeInRequestsForLuma[k],
                                myPipe[k].PitchY,
                                myPipe[k].DCCMetaPitchY,
                                myPipe[k].BlockWidthY,
                                myPipe[k].BlockHeightY,
 
                                /* Output */
-                               &st_vars->MetaRowByteY[k],
-                               &st_vars->PixelPTEBytesPerRowY[k],
+                               &MetaRowByteY[k],
+                               &PixelPTEBytesPerRowY[k],
                                &dpte_row_width_luma_ub[k],
                                &dpte_row_height_luma[k],
                                &dpte_row_height_linear_luma[k],
-                               &st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k],
-                               &st_vars->dpte_row_width_luma_ub_one_row_per_frame[k],
-                               &st_vars->dpte_row_height_luma_one_row_per_frame[k],
+                               &PixelPTEBytesPerRowY_one_row_per_frame[k],
+                               &dpte_row_width_luma_ub_one_row_per_frame[k],
+                               &dpte_row_height_luma_one_row_per_frame[k],
                                &meta_req_width[k],
                                &meta_req_height[k],
                                &meta_row_width[k],
@@ -2098,19 +2119,19 @@ void dml32_CalculateVMRowAndSwath(
                                &VInitPreFillY[k],
                                &MaxNumSwathY[k]);
 
-               PDEAndMetaPTEBytesFrame[k] = st_vars->PDEAndMetaPTEBytesFrameY + st_vars->PDEAndMetaPTEBytesFrameC;
-               MetaRowByte[k] = st_vars->MetaRowByteY[k] + st_vars->MetaRowByteC[k];
+               PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
+               MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];
 
-               if (st_vars->PixelPTEBytesPerRowY[k] <= 64 * st_vars->PTEBufferSizeInRequestsForLuma[k] &&
-                               st_vars->PixelPTEBytesPerRowC[k] <= 64 * st_vars->PTEBufferSizeInRequestsForChroma[k]) {
+               if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
+                               PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
                        PTEBufferSizeNotExceeded[k] = true;
                } else {
                        PTEBufferSizeNotExceeded[k] = false;
                }
 
-               st_vars->one_row_per_frame_fits_in_buffer[k] = (st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
-                       st_vars->PTEBufferSizeInRequestsForLuma[k] &&
-                       st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * st_vars->PTEBufferSizeInRequestsForChroma[k]);
+               one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
+                       PTEBufferSizeInRequestsForLuma[k] &&
+                       PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
        }
 
        dml32_CalculateMALLUseForStaticScreen(
@@ -2118,7 +2139,7 @@ void dml32_CalculateVMRowAndSwath(
                        MALLAllocatedForDCN,
                        UseMALLForStaticScreen,   // mode
                        SurfaceSizeInMALL,
-                       st_vars->one_row_per_frame_fits_in_buffer,
+                       one_row_per_frame_fits_in_buffer,
                        /* Output */
                        UsesMALLForStaticScreen); // boolen
 
@@ -2144,13 +2165,13 @@ void dml32_CalculateVMRowAndSwath(
                                !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
 
                if (use_one_row_for_frame[k]) {
-                       dpte_row_height_luma[k] = st_vars->dpte_row_height_luma_one_row_per_frame[k];
-                       dpte_row_width_luma_ub[k] = st_vars->dpte_row_width_luma_ub_one_row_per_frame[k];
-                       st_vars->PixelPTEBytesPerRowY[k] = st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k];
-                       dpte_row_height_chroma[k] = st_vars->dpte_row_height_chroma_one_row_per_frame[k];
-                       dpte_row_width_chroma_ub[k] = st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k];
-                       st_vars->PixelPTEBytesPerRowC[k] = st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k];
-                       PTEBufferSizeNotExceeded[k] = st_vars->one_row_per_frame_fits_in_buffer[k];
+                       dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
+                       dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
+                       PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
+                       dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
+                       dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
+                       PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
+                       PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
                }
 
                if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
@@ -2158,7 +2179,7 @@ void dml32_CalculateVMRowAndSwath(
                else
                        DCCMetaBufferSizeNotExceeded[k] = false;
 
-               PixelPTEBytesPerRow[k] = st_vars->PixelPTEBytesPerRowY[k] + st_vars->PixelPTEBytesPerRowC[k];
+               PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
                if (use_one_row_for_frame[k])
                        PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
 
@@ -2169,11 +2190,11 @@ void dml32_CalculateVMRowAndSwath(
                                myPipe[k].VRatioChroma,
                                myPipe[k].DCCEnable,
                                myPipe[k].HTotal / myPipe[k].PixelClock,
-                               st_vars->MetaRowByteY[k], st_vars->MetaRowByteC[k],
+                               MetaRowByteY[k], MetaRowByteC[k],
                                meta_row_height[k],
                                meta_row_height_chroma[k],
-                               st_vars->PixelPTEBytesPerRowY[k],
-                               st_vars->PixelPTEBytesPerRowC[k],
+                               PixelPTEBytesPerRowY[k],
+                               PixelPTEBytesPerRowC[k],
                                dpte_row_height_luma[k],
                                dpte_row_height_chroma[k],
 
@@ -2189,12 +2210,12 @@ void dml32_CalculateVMRowAndSwath(
                dml_print("DML::%s: k=%d, dpte_row_height_luma         = %d\n",  __func__, k, dpte_row_height_luma[k]);
                dml_print("DML::%s: k=%d, dpte_row_width_luma_ub       = %d\n",
                                __func__, k, dpte_row_width_luma_ub[k]);
-               dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY         = %d\n",  __func__, k, st_vars->PixelPTEBytesPerRowY[k]);
+               dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY         = %d\n",  __func__, k, PixelPTEBytesPerRowY[k]);
                dml_print("DML::%s: k=%d, dpte_row_height_chroma       = %d\n",
                                __func__, k, dpte_row_height_chroma[k]);
                dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub     = %d\n",
                                __func__, k, dpte_row_width_chroma_ub[k]);
-               dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC         = %d\n",  __func__, k, st_vars->PixelPTEBytesPerRowC[k]);
+               dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC         = %d\n",  __func__, k, PixelPTEBytesPerRowC[k]);
                dml_print("DML::%s: k=%d, PixelPTEBytesPerRow          = %d\n",  __func__, k, PixelPTEBytesPerRow[k]);
                dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded     = %d\n",
                                __func__, k, PTEBufferSizeNotExceeded[k]);
@@ -3342,7 +3363,6 @@ double dml32_CalculateExtraLatency(
 } // CalculateExtraLatency
 
 bool dml32_CalculatePrefetchSchedule(
-               struct dml32_CalculatePrefetchSchedule *st_vars,
                double HostVMInefficiencyFactor,
                DmlPipe *myPipe,
                unsigned int DSCDelay,
@@ -3406,18 +3426,45 @@ bool dml32_CalculatePrefetchSchedule(
                double   *VReadyOffsetPix)
 {
        bool MyError = false;
-
-       st_vars->TimeForFetchingMetaPTE = 0;
-       st_vars->TimeForFetchingRowInVBlank = 0;
-       st_vars->LinesToRequestPrefetchPixelData = 0;
-       st_vars->max_vratio_pre = __DML_MAX_VRATIO_PRE__;
-       st_vars->Tsw_est1 = 0;
-       st_vars->Tsw_est3 = 0;
+       unsigned int DPPCycles, DISPCLKCycles;
+       double DSTTotalPixelsAfterScaler;
+       double LineTime;
+       double dst_y_prefetch_equ;
+       double prefetch_bw_oto;
+       double Tvm_oto;
+       double Tr0_oto;
+       double Tvm_oto_lines;
+       double Tr0_oto_lines;
+       double dst_y_prefetch_oto;
+       double TimeForFetchingMetaPTE = 0;
+       double TimeForFetchingRowInVBlank = 0;
+       double LinesToRequestPrefetchPixelData = 0;
+       unsigned int HostVMDynamicLevelsTrips;
+       double  trip_to_mem;
+       double  Tvm_trips;
+       double  Tr0_trips;
+       double  Tvm_trips_rounded;
+       double  Tr0_trips_rounded;
+       double  Lsw_oto;
+       double  Tpre_rounded;
+       double  prefetch_bw_equ;
+       double  Tvm_equ;
+       double  Tr0_equ;
+       double  Tdmbf;
+       double  Tdmec;
+       double  Tdmsks;
+       double  prefetch_sw_bytes;
+       double  bytes_pp;
+       double  dep_bytes;
+       unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__;
+       double  min_Lsw;
+       double  Tsw_est1 = 0;
+       double  Tsw_est3 = 0;
 
        if (GPUVMEnable == true && HostVMEnable == true)
-               st_vars->HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
+               HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
        else
-               st_vars->HostVMDynamicLevelsTrips = 0;
+               HostVMDynamicLevelsTrips = 0;
 #ifdef __DML_VBA_DEBUG__
        dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
        dml_print("DML::%s: GPUVMPageTableLevels = %d\n", __func__, GPUVMPageTableLevels);
@@ -3440,19 +3487,19 @@ bool dml32_CalculatePrefetchSchedule(
                        TSetup,
 
                        /* output */
-                       &st_vars->Tdmbf,
-                       &st_vars->Tdmec,
-                       &st_vars->Tdmsks,
+                       &Tdmbf,
+                       &Tdmec,
+                       &Tdmsks,
                        VUpdateOffsetPix,
                        VUpdateWidthPix,
                        VReadyOffsetPix);
 
-       st_vars->LineTime = myPipe->HTotal / myPipe->PixelClock;
-       st_vars->trip_to_mem = UrgentLatency;
-       st_vars->Tvm_trips = UrgentExtraLatency + st_vars->trip_to_mem * (GPUVMPageTableLevels * (st_vars->HostVMDynamicLevelsTrips + 1) - 1);
+       LineTime = myPipe->HTotal / myPipe->PixelClock;
+       trip_to_mem = UrgentLatency;
+       Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
 
        if (DynamicMetadataVMEnabled == true)
-               *Tdmdl = TWait + st_vars->Tvm_trips + st_vars->trip_to_mem;
+               *Tdmdl = TWait + Tvm_trips + trip_to_mem;
        else
                *Tdmdl = TWait + UrgentExtraLatency;
 
@@ -3462,15 +3509,15 @@ bool dml32_CalculatePrefetchSchedule(
 #endif
 
        if (DynamicMetadataEnable == true) {
-               if (VStartup * st_vars->LineTime < *TSetup + *Tdmdl + st_vars->Tdmbf + st_vars->Tdmec + st_vars->Tdmsks) {
+               if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
                        *NotEnoughTimeForDynamicMetadata = true;
 #ifdef __DML_VBA_DEBUG__
                        dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
                        dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
-                                       __func__, st_vars->Tdmbf);
-                       dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec);
+                                       __func__, Tdmbf);
+                       dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
                        dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
-                                       __func__, st_vars->Tdmsks);
+                                       __func__, Tdmsks);
                        dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
                                        __func__, *Tdmdl);
 #endif
@@ -3482,21 +3529,21 @@ bool dml32_CalculatePrefetchSchedule(
        }
 
        *Tdmdl_vm =  (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true &&
-                       GPUVMEnable == true ? TWait + st_vars->Tvm_trips : 0);
+                       GPUVMEnable == true ? TWait + Tvm_trips : 0);
 
        if (myPipe->ScalerEnabled)
-               st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
+               DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
        else
-               st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
+               DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
 
-       st_vars->DPPCycles = st_vars->DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
+       DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
 
-       st_vars->DISPCLKCycles = DISPCLKDelaySubtotal;
+       DISPCLKCycles = DISPCLKDelaySubtotal;
 
        if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
                return true;
 
-       *DSTXAfterScaler = st_vars->DPPCycles * myPipe->PixelClock / myPipe->Dppclk + st_vars->DISPCLKCycles *
+       *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
                        myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
 
        *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
@@ -3506,10 +3553,10 @@ bool dml32_CalculatePrefetchSchedule(
                        + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
 
 #ifdef __DML_VBA_DEBUG__
-       dml_print("DML::%s: DPPCycles: %d\n", __func__, st_vars->DPPCycles);
+       dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
        dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
        dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
-       dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, st_vars->DISPCLKCycles);
+       dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
        dml_print("DML::%s: DISPCLK: %f\n", __func__,  myPipe->Dispclk);
        dml_print("DML::%s: DSCDelay: %d\n", __func__,  DSCDelay);
        dml_print("DML::%s: ODMMode: %d\n", __func__,  myPipe->ODMMode);
@@ -3522,9 +3569,9 @@ bool dml32_CalculatePrefetchSchedule(
        else
                *DSTYAfterScaler = 0;
 
-       st_vars->DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
-       *DSTYAfterScaler = dml_floor(st_vars->DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
-       *DSTXAfterScaler = st_vars->DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
+       DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
+       *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
+       *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
 #ifdef __DML_VBA_DEBUG__
        dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__,  *DSTXAfterScaler);
        dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
@@ -3532,132 +3579,132 @@ bool dml32_CalculatePrefetchSchedule(
 
        MyError = false;
 
-       st_vars->Tr0_trips = st_vars->trip_to_mem * (st_vars->HostVMDynamicLevelsTrips + 1);
+       Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
 
        if (GPUVMEnable == true) {
-               st_vars->Tvm_trips_rounded = dml_ceil(4.0 * st_vars->Tvm_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime;
-               st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime;
+               Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
+               Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
                if (GPUVMPageTableLevels >= 3) {
-                       *Tno_bw = UrgentExtraLatency + st_vars->trip_to_mem *
-                                       (double) ((GPUVMPageTableLevels - 2) * (st_vars->HostVMDynamicLevelsTrips + 1) - 1);
+                       *Tno_bw = UrgentExtraLatency + trip_to_mem *
+                                       (double) ((GPUVMPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
                } else if (GPUVMPageTableLevels == 1 && myPipe->DCCEnable != true) {
-                       st_vars->Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / st_vars->LineTime, 1.0) /
-                                       4.0 * st_vars->LineTime; // VBA_ERROR
+                       Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
+                                       4.0 * LineTime; // VBA_ERROR
                        *Tno_bw = UrgentExtraLatency;
                } else {
                        *Tno_bw = 0;
                }
        } else if (myPipe->DCCEnable == true) {
-               st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0;
-               st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime;
+               Tvm_trips_rounded = LineTime / 4.0;
+               Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
                *Tno_bw = 0;
        } else {
-               st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0;
-               st_vars->Tr0_trips_rounded = st_vars->LineTime / 2.0;
+               Tvm_trips_rounded = LineTime / 4.0;
+               Tr0_trips_rounded = LineTime / 2.0;
                *Tno_bw = 0;
        }
-       st_vars->Tvm_trips_rounded = dml_max(st_vars->Tvm_trips_rounded, st_vars->LineTime / 4.0);
-       st_vars->Tr0_trips_rounded = dml_max(st_vars->Tr0_trips_rounded, st_vars->LineTime / 4.0);
+       Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
+       Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
 
        if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
                        || myPipe->SourcePixelFormat == dm_420_12) {
-               st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
+               bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
        } else {
-               st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
+               bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
        }
 
-       st_vars->prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
+       prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
                        + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
-       st_vars->prefetch_bw_oto = dml_max(st_vars->bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
-                       st_vars->prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * st_vars->LineTime));
+       prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
+                       prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
 
-       st_vars->min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / st_vars->max_vratio_pre;
-       st_vars->min_Lsw = dml_max(st_vars->min_Lsw, 1.0);
-       st_vars->Lsw_oto = dml_ceil(4.0 * dml_max(st_vars->prefetch_sw_bytes / st_vars->prefetch_bw_oto / st_vars->LineTime, st_vars->min_Lsw), 1.0) / 4.0;
+       min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
+       min_Lsw = dml_max(min_Lsw, 1.0);
+       Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
 
        if (GPUVMEnable == true) {
-               st_vars->Tvm_oto = dml_max3(
-                               st_vars->Tvm_trips,
-                               *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / st_vars->prefetch_bw_oto,
-                               st_vars->LineTime / 4.0);
+               Tvm_oto = dml_max3(
+                               Tvm_trips,
+                               *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
+                               LineTime / 4.0);
        } else
-               st_vars->Tvm_oto = st_vars->LineTime / 4.0;
+               Tvm_oto = LineTime / 4.0;
 
        if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
-               st_vars->Tr0_oto = dml_max4(
-                               st_vars->Tr0_trips,
-                               (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto,
-                               (st_vars->LineTime - st_vars->Tvm_oto)/2.0,
-                               st_vars->LineTime / 4.0);
+               Tr0_oto = dml_max4(
+                               Tr0_trips,
+                               (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
+                               (LineTime - Tvm_oto)/2.0,
+                               LineTime / 4.0);
 #ifdef __DML_VBA_DEBUG__
                dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
-                               (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto);
-               dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, st_vars->Tr0_trips);
-               dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, st_vars->LineTime - st_vars->Tvm_oto);
-               dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, st_vars->LineTime / 4);
+                               (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
+               dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
+               dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
+               dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
 #endif
        } else
-               st_vars->Tr0_oto = (st_vars->LineTime - st_vars->Tvm_oto) / 2.0;
+               Tr0_oto = (LineTime - Tvm_oto) / 2.0;
 
-       st_vars->Tvm_oto_lines = dml_ceil(4.0 * st_vars->Tvm_oto / st_vars->LineTime, 1) / 4.0;
-       st_vars->Tr0_oto_lines = dml_ceil(4.0 * st_vars->Tr0_oto / st_vars->LineTime, 1) / 4.0;
-       st_vars->dst_y_prefetch_oto = st_vars->Tvm_oto_lines + 2 * st_vars->Tr0_oto_lines + st_vars->Lsw_oto;
+       Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
+       Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
+       dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
 
-       st_vars->dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / st_vars->LineTime -
+       dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
                        (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
 
 #ifdef __DML_VBA_DEBUG__
        dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
-       dml_print("DML::%s: min_Lsw = %f\n", __func__, st_vars->min_Lsw);
+       dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
        dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
        dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
-       dml_print("DML::%s: trip_to_mem = %f\n", __func__, st_vars->trip_to_mem);
+       dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
        dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
        dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
        dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
        dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
        dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
        dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
-       dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, st_vars->prefetch_sw_bytes);
-       dml_print("DML::%s: bytes_pp = %f\n", __func__, st_vars->bytes_pp);
+       dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
+       dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
        dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
        dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
        dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
        dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
-       dml_print("DML::%s: Tvm_trips = %f\n", __func__, st_vars->Tvm_trips);
-       dml_print("DML::%s: Tr0_trips = %f\n", __func__, st_vars->Tr0_trips);
-       dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, st_vars->prefetch_bw_oto);
-       dml_print("DML::%s: Tr0_oto = %f\n", __func__, st_vars->Tr0_oto);
-       dml_print("DML::%s: Tvm_oto = %f\n", __func__, st_vars->Tvm_oto);
-       dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, st_vars->Tvm_oto_lines);
-       dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, st_vars->Tr0_oto_lines);
-       dml_print("DML::%s: Lsw_oto = %f\n", __func__, st_vars->Lsw_oto);
-       dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, st_vars->dst_y_prefetch_oto);
-       dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, st_vars->dst_y_prefetch_equ);
+       dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
+       dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
+       dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
+       dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
+       dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
+       dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
+       dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
+       dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
+       dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
+       dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
 #endif
 
-       st_vars->dst_y_prefetch_equ = dml_floor(4.0 * (st_vars->dst_y_prefetch_equ + 0.125), 1) / 4.0;
-       st_vars->Tpre_rounded = st_vars->dst_y_prefetch_equ * st_vars->LineTime;
+       dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
+       Tpre_rounded = dst_y_prefetch_equ * LineTime;
 #ifdef __DML_VBA_DEBUG__
-       dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, st_vars->dst_y_prefetch_equ);
-       dml_print("DML::%s: LineTime: %f\n", __func__, st_vars->LineTime);
+       dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
+       dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
        dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
        dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
-                       __func__, VStartup * st_vars->LineTime);
+                       __func__, VStartup * LineTime);
        dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
        dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
-       dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, st_vars->Tdmbf);
-       dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec);
+       dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
+       dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
        dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
        dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
        dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
                        __func__, *DSTYAfterScaler);
 #endif
-       st_vars->dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
+       dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
                        MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
 
-       if (st_vars->prefetch_sw_bytes < st_vars->dep_bytes)
-               st_vars->prefetch_sw_bytes = 2 * st_vars->dep_bytes;
+       if (prefetch_sw_bytes < dep_bytes)
+               prefetch_sw_bytes = 2 * dep_bytes;
 
        *PrefetchBandwidth = 0;
        *DestinationLinesToRequestVMInVBlank = 0;
@@ -3665,61 +3712,61 @@ bool dml32_CalculatePrefetchSchedule(
        *VRatioPrefetchY = 0;
        *VRatioPrefetchC = 0;
        *RequiredPrefetchPixDataBWLuma = 0;
-       if (st_vars->dst_y_prefetch_equ > 1) {
+       if (dst_y_prefetch_equ > 1) {
                double PrefetchBandwidth1;
                double PrefetchBandwidth2;
                double PrefetchBandwidth3;
                double PrefetchBandwidth4;
 
-               if (st_vars->Tpre_rounded - *Tno_bw > 0) {
+               if (Tpre_rounded - *Tno_bw > 0) {
                        PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
                                        + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
-                                       + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - *Tno_bw);
-                       st_vars->Tsw_est1 = st_vars->prefetch_sw_bytes / PrefetchBandwidth1;
+                                       + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
+                       Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
                } else
                        PrefetchBandwidth1 = 0;
 
-               if (VStartup == MaxVStartup && (st_vars->Tsw_est1 / st_vars->LineTime < st_vars->min_Lsw)
-                               && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw > 0) {
+               if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
+                               && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
                        PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
                                        + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
-                                       / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw);
+                                       / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
                }
 
-               if (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded > 0)
-                       PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + st_vars->prefetch_sw_bytes) /
-                       (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded);
+               if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
+                       PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
+                       (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
                else
                        PrefetchBandwidth2 = 0;
 
-               if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded > 0) {
+               if (Tpre_rounded - Tvm_trips_rounded > 0) {
                        PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
-                                       + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded);
-                       st_vars->Tsw_est3 = st_vars->prefetch_sw_bytes / PrefetchBandwidth3;
+                                       + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
+                       Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
                } else
                        PrefetchBandwidth3 = 0;
 
 
                if (VStartup == MaxVStartup &&
-                               (st_vars->Tsw_est3 / st_vars->LineTime < st_vars->min_Lsw) && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 *
-                               st_vars->LineTime - st_vars->Tvm_trips_rounded > 0) {
+                               (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
+                               LineTime - Tvm_trips_rounded > 0) {
                        PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
-                                       / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - st_vars->Tvm_trips_rounded);
+                                       / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
                }
 
-               if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded > 0) {
-                       PrefetchBandwidth4 = st_vars->prefetch_sw_bytes /
-                                       (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded);
+               if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
+                       PrefetchBandwidth4 = prefetch_sw_bytes /
+                                       (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
                } else {
                        PrefetchBandwidth4 = 0;
                }
 
 #ifdef __DML_VBA_DEBUG__
-               dml_print("DML::%s: Tpre_rounded: %f\n", __func__, st_vars->Tpre_rounded);
+               dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
                dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
-               dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, st_vars->Tvm_trips_rounded);
-               dml_print("DML::%s: Tsw_est1: %f\n", __func__, st_vars->Tsw_est1);
-               dml_print("DML::%s: Tsw_est3: %f\n", __func__, st_vars->Tsw_est3);
+               dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
+               dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
+               dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
                dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
                dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
                dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
@@ -3732,9 +3779,9 @@ bool dml32_CalculatePrefetchSchedule(
 
                        if (PrefetchBandwidth1 > 0) {
                                if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
-                                               >= st_vars->Tvm_trips_rounded
+                                               >= Tvm_trips_rounded
                                                && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
-                                                               / PrefetchBandwidth1 >= st_vars->Tr0_trips_rounded) {
+                                                               / PrefetchBandwidth1 >= Tr0_trips_rounded) {
                                        Case1OK = true;
                                } else {
                                        Case1OK = false;
@@ -3745,9 +3792,9 @@ bool dml32_CalculatePrefetchSchedule(
 
                        if (PrefetchBandwidth2 > 0) {
                                if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
-                                               >= st_vars->Tvm_trips_rounded
+                                               >= Tvm_trips_rounded
                                                && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
-                                               / PrefetchBandwidth2 < st_vars->Tr0_trips_rounded) {
+                                               / PrefetchBandwidth2 < Tr0_trips_rounded) {
                                        Case2OK = true;
                                } else {
                                        Case2OK = false;
@@ -3758,9 +3805,9 @@ bool dml32_CalculatePrefetchSchedule(
 
                        if (PrefetchBandwidth3 > 0) {
                                if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
-                                               st_vars->Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
+                                               Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
                                                                HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
-                                                               st_vars->Tr0_trips_rounded) {
+                                                               Tr0_trips_rounded) {
                                        Case3OK = true;
                                } else {
                                        Case3OK = false;
@@ -3770,80 +3817,80 @@ bool dml32_CalculatePrefetchSchedule(
                        }
 
                        if (Case1OK)
-                               st_vars->prefetch_bw_equ = PrefetchBandwidth1;
+                               prefetch_bw_equ = PrefetchBandwidth1;
                        else if (Case2OK)
-                               st_vars->prefetch_bw_equ = PrefetchBandwidth2;
+                               prefetch_bw_equ = PrefetchBandwidth2;
                        else if (Case3OK)
-                               st_vars->prefetch_bw_equ = PrefetchBandwidth3;
+                               prefetch_bw_equ = PrefetchBandwidth3;
                        else
-                               st_vars->prefetch_bw_equ = PrefetchBandwidth4;
+                               prefetch_bw_equ = PrefetchBandwidth4;
 
 #ifdef __DML_VBA_DEBUG__
                        dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
                        dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
                        dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
-                       dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, st_vars->prefetch_bw_equ);
+                       dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
 #endif
 
-                       if (st_vars->prefetch_bw_equ > 0) {
+                       if (prefetch_bw_equ > 0) {
                                if (GPUVMEnable == true) {
-                                       st_vars->Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
-                                                       HostVMInefficiencyFactor / st_vars->prefetch_bw_equ,
-                                                       st_vars->Tvm_trips, st_vars->LineTime / 4);
+                                       Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
+                                                       HostVMInefficiencyFactor / prefetch_bw_equ,
+                                                       Tvm_trips, LineTime / 4);
                                } else {
-                                       st_vars->Tvm_equ = st_vars->LineTime / 4;
+                                       Tvm_equ = LineTime / 4;
                                }
 
                                if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
-                                       st_vars->Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
-                                                       HostVMInefficiencyFactor) / st_vars->prefetch_bw_equ, st_vars->Tr0_trips,
-                                                       (st_vars->LineTime - st_vars->Tvm_equ) / 2, st_vars->LineTime / 4);
+                                       Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
+                                                       HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
+                                                       (LineTime - Tvm_equ) / 2, LineTime / 4);
                                } else {
-                                       st_vars->Tr0_equ = (st_vars->LineTime - st_vars->Tvm_equ) / 2;
+                                       Tr0_equ = (LineTime - Tvm_equ) / 2;
                                }
                        } else {
-                               st_vars->Tvm_equ = 0;
-                               st_vars->Tr0_equ = 0;
+                               Tvm_equ = 0;
+                               Tr0_equ = 0;
 #ifdef __DML_VBA_DEBUG__
                                dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
 #endif
                        }
                }
 
-               if (st_vars->dst_y_prefetch_oto < st_vars->dst_y_prefetch_equ) {
-                       *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_oto;
-                       st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_oto;
-                       st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_oto;
-                       *PrefetchBandwidth = st_vars->prefetch_bw_oto;
+               if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
+                       *DestinationLinesForPrefetch = dst_y_prefetch_oto;
+                       TimeForFetchingMetaPTE = Tvm_oto;
+                       TimeForFetchingRowInVBlank = Tr0_oto;
+                       *PrefetchBandwidth = prefetch_bw_oto;
                } else {
-                       *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_equ;
-                       st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_equ;
-                       st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_equ;
-                       *PrefetchBandwidth = st_vars->prefetch_bw_equ;
+                       *DestinationLinesForPrefetch = dst_y_prefetch_equ;
+                       TimeForFetchingMetaPTE = Tvm_equ;
+                       TimeForFetchingRowInVBlank = Tr0_equ;
+                       *PrefetchBandwidth = prefetch_bw_equ;
                }
 
-               *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * st_vars->TimeForFetchingMetaPTE / st_vars->LineTime, 1.0) / 4.0;
+               *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
 
                *DestinationLinesToRequestRowInVBlank =
-                               dml_ceil(4.0 * st_vars->TimeForFetchingRowInVBlank / st_vars->LineTime, 1.0) / 4.0;
+                               dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
 
-               st_vars->LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch -
+               LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch -
                                *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
 
 #ifdef __DML_VBA_DEBUG__
                dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
                dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
                                __func__, *DestinationLinesToRequestVMInVBlank);
-               dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, st_vars->TimeForFetchingRowInVBlank);
-               dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime);
+               dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
+               dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
                dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
                                __func__, *DestinationLinesToRequestRowInVBlank);
                dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
-               dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, st_vars->LinesToRequestPrefetchPixelData);
+               dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
 #endif
 
-               if (st_vars->LinesToRequestPrefetchPixelData >= 1 && st_vars->prefetch_bw_equ > 0) {
-                       *VRatioPrefetchY = (double) PrefetchSourceLinesY / st_vars->LinesToRequestPrefetchPixelData;
+               if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
+                       *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
                        *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
 #ifdef __DML_VBA_DEBUG__
                        dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
@@ -3851,12 +3898,12 @@ bool dml32_CalculatePrefetchSchedule(
                        dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
 #endif
                        if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
-                               if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
+                               if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
                                        *VRatioPrefetchY =
                                                        dml_max((double) PrefetchSourceLinesY /
-                                                                       st_vars->LinesToRequestPrefetchPixelData,
+                                                                       LinesToRequestPrefetchPixelData,
                                                                        (double) MaxNumSwathY * SwathHeightY /
-                                                                       (st_vars->LinesToRequestPrefetchPixelData -
+                                                                       (LinesToRequestPrefetchPixelData -
                                                                        (VInitPreFillY - 3.0) / 2.0));
                                        *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
                                } else {
@@ -3870,7 +3917,7 @@ bool dml32_CalculatePrefetchSchedule(
 #endif
                        }
 
-                       *VRatioPrefetchC = (double) PrefetchSourceLinesC / st_vars->LinesToRequestPrefetchPixelData;
+                       *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
                        *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
 
 #ifdef __DML_VBA_DEBUG__
@@ -3879,11 +3926,11 @@ bool dml32_CalculatePrefetchSchedule(
                        dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
 #endif
                        if ((SwathHeightC > 4)) {
-                               if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
+                               if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
                                        *VRatioPrefetchC =
                                                dml_max(*VRatioPrefetchC,
                                                        (double) MaxNumSwathC * SwathHeightC /
-                                                       (st_vars->LinesToRequestPrefetchPixelData -
+                                                       (LinesToRequestPrefetchPixelData -
                                                        (VInitPreFillC - 3.0) / 2.0));
                                        *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
                                } else {
@@ -3898,25 +3945,25 @@ bool dml32_CalculatePrefetchSchedule(
                        }
 
                        *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
-                                       / st_vars->LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
-                                       / st_vars->LineTime;
+                                       / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
+                                       / LineTime;
 
 #ifdef __DML_VBA_DEBUG__
                        dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
                        dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
-                       dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime);
+                       dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
                        dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
                                        __func__, *RequiredPrefetchPixDataBWLuma);
 #endif
                        *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
-                                       st_vars->LinesToRequestPrefetchPixelData
+                                       LinesToRequestPrefetchPixelData
                                        * myPipe->BytePerPixelC
-                                       * swath_width_chroma_ub / st_vars->LineTime;
+                                       * swath_width_chroma_ub / LineTime;
                } else {
                        MyError = true;
 #ifdef __DML_VBA_DEBUG__
                        dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
-                                       __func__, st_vars->LinesToRequestPrefetchPixelData);
+                                       __func__, LinesToRequestPrefetchPixelData);
 #endif
                        *VRatioPrefetchY = 0;
                        *VRatioPrefetchC = 0;
@@ -3925,15 +3972,15 @@ bool dml32_CalculatePrefetchSchedule(
                }
 #ifdef __DML_VBA_DEBUG__
                dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
-                       (double)st_vars->LinesToRequestPrefetchPixelData * st_vars->LineTime +
-                       2.0*st_vars->TimeForFetchingRowInVBlank + st_vars->TimeForFetchingMetaPTE);
-               dml_print("DML:  Tvm: %fus - time to fetch page tables for meta surface\n", st_vars->TimeForFetchingMetaPTE);
+                       (double)LinesToRequestPrefetchPixelData * LineTime +
+                       2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
+               dml_print("DML:  Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
                dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
-                       (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime);
+                       (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
                dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
-               dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * st_vars->LineTime -
-                       st_vars->TimeForFetchingMetaPTE - 2*st_vars->TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
-                       ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime - TWait - TCalc - *TSetup);
+               dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
+                       TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
+                       ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
                dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
                                PixelPTEBytesPerRow);
 #endif
@@ -3941,7 +3988,7 @@ bool dml32_CalculatePrefetchSchedule(
                MyError = true;
 #ifdef __DML_VBA_DEBUG__
                dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
-                               __func__, st_vars->dst_y_prefetch_equ);
+                               __func__, dst_y_prefetch_equ);
 #endif
        }
 
@@ -3957,10 +4004,10 @@ bool dml32_CalculatePrefetchSchedule(
                        dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
                        dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
                                        __func__, *DestinationLinesToRequestVMInVBlank);
-                       dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime);
+                       dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
 #endif
                        prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
-                                       (*DestinationLinesToRequestVMInVBlank * st_vars->LineTime);
+                                       (*DestinationLinesToRequestVMInVBlank * LineTime);
 #ifdef __DML_VBA_DEBUG__
                        dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
 #endif
@@ -3977,7 +4024,7 @@ bool dml32_CalculatePrefetchSchedule(
                        prefetch_row_bw = 0;
                } else if (*DestinationLinesToRequestRowInVBlank > 0) {
                        prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
-                                       (*DestinationLinesToRequestRowInVBlank * st_vars->LineTime);
+                                       (*DestinationLinesToRequestRowInVBlank * LineTime);
 
 #ifdef __DML_VBA_DEBUG__
                        dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
@@ -4000,12 +4047,12 @@ bool dml32_CalculatePrefetchSchedule(
 
        if (MyError) {
                *PrefetchBandwidth = 0;
-               st_vars->TimeForFetchingMetaPTE = 0;
-               st_vars->TimeForFetchingRowInVBlank = 0;
+               TimeForFetchingMetaPTE = 0;
+               TimeForFetchingRowInVBlank = 0;
                *DestinationLinesToRequestVMInVBlank = 0;
                *DestinationLinesToRequestRowInVBlank = 0;
                *DestinationLinesForPrefetch = 0;
-               st_vars->LinesToRequestPrefetchPixelData = 0;
+               LinesToRequestPrefetchPixelData = 0;
                *VRatioPrefetchY = 0;
                *VRatioPrefetchC = 0;
                *RequiredPrefetchPixDataBWLuma = 0;
@@ -4159,7 +4206,6 @@ void dml32_CalculateFlipSchedule(
 } // CalculateFlipSchedule
 
 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
-               struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport *st_vars,
                bool USRRetrainingRequiredFinal,
                enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
                unsigned int PrefetchMode,
@@ -4221,15 +4267,37 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
                double ActiveDRAMClockChangeLatencyMargin[])
 {
        unsigned int i, j, k;
-
-       st_vars->SurfaceWithMinActiveFCLKChangeMargin = 0;
-       st_vars->DRAMClockChangeSupportNumber = 0;
-       st_vars->DRAMClockChangeMethod = 0;
-       st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
-       st_vars->MinActiveFCLKChangeMargin = 0.;
-       st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
-       st_vars->TotalPixelBW = 0.0;
-       st_vars->TotalActiveWriteback = 0;
+       unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
+       unsigned int DRAMClockChangeSupportNumber = 0;
+       unsigned int LastSurfaceWithoutMargin;
+       unsigned int DRAMClockChangeMethod = 0;
+       bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
+       double MinActiveFCLKChangeMargin = 0.;
+       double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
+       double ActiveClockChangeLatencyHidingY;
+       double ActiveClockChangeLatencyHidingC;
+       double ActiveClockChangeLatencyHiding;
+    double EffectiveDETBufferSizeY;
+       double     ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
+       double     USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
+       double TotalPixelBW = 0.0;
+       bool    SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
+       double     EffectiveLBLatencyHidingY;
+       double     EffectiveLBLatencyHidingC;
+       double     LinesInDETY[DC__NUM_DPP__MAX];
+       double     LinesInDETC[DC__NUM_DPP__MAX];
+       unsigned int    LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
+       unsigned int    LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
+       double     FullDETBufferingTimeY;
+       double     FullDETBufferingTimeC;
+       double     WritebackDRAMClockChangeLatencyMargin;
+       double     WritebackFCLKChangeLatencyMargin;
+       double     WritebackLatencyHiding;
+       bool    SameTimingForFCLKChange;
+
+       unsigned int    TotalActiveWriteback = 0;
+       unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
+       unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
 
        Watermark->UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
        Watermark->USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
@@ -4261,13 +4329,13 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
 #endif
 
 
-       st_vars->TotalActiveWriteback = 0;
+       TotalActiveWriteback = 0;
        for (k = 0; k < NumberOfActiveSurfaces; ++k) {
                if (WritebackEnable[k] == true)
-                       st_vars->TotalActiveWriteback = st_vars->TotalActiveWriteback + 1;
+                       TotalActiveWriteback = TotalActiveWriteback + 1;
        }
 
-       if (st_vars->TotalActiveWriteback <= 1) {
+       if (TotalActiveWriteback <= 1) {
                Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
        } else {
                Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
@@ -4277,7 +4345,7 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
                Watermark->WritebackUrgentWatermark = Watermark->WritebackUrgentWatermark
                                + mmSOCParameters.USRRetrainingLatency;
 
-       if (st_vars->TotalActiveWriteback <= 1) {
+       if (TotalActiveWriteback <= 1) {
                Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
                                + mmSOCParameters.WritebackLatency;
                Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
@@ -4307,14 +4375,14 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
 #endif
 
        for (k = 0; k < NumberOfActiveSurfaces; ++k) {
-               st_vars->TotalPixelBW = st_vars->TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] +
+               TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] +
                                SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) / (HTotal[k] / PixelClock[k]);
        }
 
        for (k = 0; k < NumberOfActiveSurfaces; ++k) {
 
-               st_vars->LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1);
-               st_vars->LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1);
+               LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1);
+               LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1);
 
 
 #ifdef __DML_VBA_DEBUG__
@@ -4325,72 +4393,72 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
                dml_print("DML::%s: k=%d, VTaps              = %d\n", __func__, k, VTaps[k]);
 #endif
 
-               st_vars->EffectiveLBLatencyHidingY = st_vars->LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]);
-               st_vars->EffectiveLBLatencyHidingC = st_vars->LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
-               st_vars->EffectiveDETBufferSizeY = DETBufferSizeY[k];
+               EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]);
+               EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
+               EffectiveDETBufferSizeY = DETBufferSizeY[k];
 
                if (UnboundedRequestEnabled) {
-                       st_vars->EffectiveDETBufferSizeY = st_vars->EffectiveDETBufferSizeY
+                       EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
                                        + CompressedBufferSizeInkByte * 1024
                                                        * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k])
-                                                       / (HTotal[k] / PixelClock[k]) / st_vars->TotalPixelBW;
+                                                       / (HTotal[k] / PixelClock[k]) / TotalPixelBW;
                }
 
-               st_vars->LinesInDETY[k] = (double) st_vars->EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
-               st_vars->LinesInDETYRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETY[k], SwathHeightY[k]);
-               st_vars->FullDETBufferingTimeY = st_vars->LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
+               LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
+               LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
+               FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
 
-               st_vars->ActiveClockChangeLatencyHidingY = st_vars->EffectiveLBLatencyHidingY + st_vars->FullDETBufferingTimeY
+               ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
                                - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k];
 
                if (NumberOfActiveSurfaces > 1) {
-                       st_vars->ActiveClockChangeLatencyHidingY = st_vars->ActiveClockChangeLatencyHidingY
+                       ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
                                        - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightY[k] * HTotal[k]
                                                        / PixelClock[k] / VRatio[k];
                }
 
                if (BytePerPixelDETC[k] > 0) {
-                       st_vars->LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
-                       st_vars->LinesInDETCRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETC[k], SwathHeightC[k]);
-                       st_vars->FullDETBufferingTimeC = st_vars->LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k])
+                       LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
+                       LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]);
+                       FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k])
                                        / VRatioChroma[k];
-                       st_vars->ActiveClockChangeLatencyHidingC = st_vars->EffectiveLBLatencyHidingC + st_vars->FullDETBufferingTimeC
+                       ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
                                        - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k]
                                                        / PixelClock[k];
                        if (NumberOfActiveSurfaces > 1) {
-                               st_vars->ActiveClockChangeLatencyHidingC = st_vars->ActiveClockChangeLatencyHidingC
+                               ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
                                                - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightC[k] * HTotal[k]
                                                                / PixelClock[k] / VRatioChroma[k];
                        }
-                       st_vars->ActiveClockChangeLatencyHiding = dml_min(st_vars->ActiveClockChangeLatencyHidingY,
-                                       st_vars->ActiveClockChangeLatencyHidingC);
+                       ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY,
+                                       ActiveClockChangeLatencyHidingC);
                } else {
-                       st_vars->ActiveClockChangeLatencyHiding = st_vars->ActiveClockChangeLatencyHidingY;
+                       ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
                }
 
-               ActiveDRAMClockChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
+               ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
                                - Watermark->DRAMClockChangeWatermark;
-               st_vars->ActiveFCLKChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
+               ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
                                - Watermark->FCLKChangeWatermark;
-               st_vars->USRRetrainingLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark;
+               USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark;
 
                if (WritebackEnable[k]) {
-                       st_vars->WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024
+                       WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024
                                        / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k]
                                                        / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
                        if (WritebackPixelFormat[k] == dm_444_64)
-                               st_vars->WritebackLatencyHiding = st_vars->WritebackLatencyHiding / 2;
+                               WritebackLatencyHiding = WritebackLatencyHiding / 2;
 
-                       st_vars->WritebackDRAMClockChangeLatencyMargin = st_vars->WritebackLatencyHiding
+                       WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
                                        - Watermark->WritebackDRAMClockChangeWatermark;
 
-                       st_vars->WritebackFCLKChangeLatencyMargin = st_vars->WritebackLatencyHiding
+                       WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
                                        - Watermark->WritebackFCLKChangeWatermark;
 
                        ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
-                                       st_vars->WritebackFCLKChangeLatencyMargin);
-                       st_vars->ActiveFCLKChangeLatencyMargin[k] = dml_min(st_vars->ActiveFCLKChangeLatencyMargin[k],
-                                       st_vars->WritebackDRAMClockChangeLatencyMargin);
+                                       WritebackFCLKChangeLatencyMargin);
+                       ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k],
+                                       WritebackDRAMClockChangeLatencyMargin);
                }
                MaxActiveDRAMClockChangeLatencySupported[k] =
                                (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
@@ -4409,41 +4477,41 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
                                        HTotal[i] == HTotal[j] && VTotal[i] == VTotal[j] &&
                                        VActive[i] == VActive[j]) || (SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
                                        (DRRDisplay[i] || DRRDisplay[j]))) {
-                               st_vars->SynchronizedSurfaces[i][j] = true;
+                               SynchronizedSurfaces[i][j] = true;
                        } else {
-                               st_vars->SynchronizedSurfaces[i][j] = false;
+                               SynchronizedSurfaces[i][j] = false;
                        }
                }
        }
 
        for (k = 0; k < NumberOfActiveSurfaces; ++k) {
                if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
-                               (!st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
-                               st_vars->ActiveFCLKChangeLatencyMargin[k] < st_vars->MinActiveFCLKChangeMargin)) {
-                       st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
-                       st_vars->MinActiveFCLKChangeMargin = st_vars->ActiveFCLKChangeLatencyMargin[k];
-                       st_vars->SurfaceWithMinActiveFCLKChangeMargin = k;
+                               (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
+                               ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
+                       FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
+                       MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
+                       SurfaceWithMinActiveFCLKChangeMargin = k;
                }
        }
 
-       *MinActiveFCLKChangeLatencySupported = st_vars->MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
+       *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
 
-       st_vars->SameTimingForFCLKChange = true;
+       SameTimingForFCLKChange = true;
        for (k = 0; k < NumberOfActiveSurfaces; ++k) {
-               if (!st_vars->SynchronizedSurfaces[k][st_vars->SurfaceWithMinActiveFCLKChangeMargin]) {
+               if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
                        if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
-                                       (st_vars->SameTimingForFCLKChange ||
-                                       st_vars->ActiveFCLKChangeLatencyMargin[k] <
-                                       st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
-                               st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = st_vars->ActiveFCLKChangeLatencyMargin[k];
+                                       (SameTimingForFCLKChange ||
+                                       ActiveFCLKChangeLatencyMargin[k] <
+                                       SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
+                               SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
                        }
-                       st_vars->SameTimingForFCLKChange = false;
+                       SameTimingForFCLKChange = false;
                }
        }
 
-       if (st_vars->MinActiveFCLKChangeMargin > 0) {
+       if (MinActiveFCLKChangeMargin > 0) {
                *FCLKChangeSupport = dm_fclock_change_vactive;
-       } else if ((st_vars->SameTimingForFCLKChange || st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
+       } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
                        (PrefetchMode <= 1)) {
                *FCLKChangeSupport = dm_fclock_change_vblank;
        } else {
@@ -4453,7 +4521,7 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
        *USRRetrainingSupport = true;
        for (k = 0; k < NumberOfActiveSurfaces; ++k) {
                if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
-                               (st_vars->USRRetrainingLatencyMargin[k] < 0)) {
+                               (USRRetrainingLatencyMargin[k] < 0)) {
                        *USRRetrainingSupport = false;
                }
        }
@@ -4464,42 +4532,42 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
                                UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
                                ActiveDRAMClockChangeLatencyMargin[k] < 0) {
                        if (PrefetchMode > 0) {
-                               st_vars->DRAMClockChangeSupportNumber = 2;
-                       } else if (st_vars->DRAMClockChangeSupportNumber == 0) {
-                               st_vars->DRAMClockChangeSupportNumber = 1;
-                               st_vars->LastSurfaceWithoutMargin = k;
-                       } else if (st_vars->DRAMClockChangeSupportNumber == 1 &&
-                                       !st_vars->SynchronizedSurfaces[st_vars->LastSurfaceWithoutMargin][k]) {
-                               st_vars->DRAMClockChangeSupportNumber = 2;
+                               DRAMClockChangeSupportNumber = 2;
+                       } else if (DRAMClockChangeSupportNumber == 0) {
+                               DRAMClockChangeSupportNumber = 1;
+                               LastSurfaceWithoutMargin = k;
+                       } else if (DRAMClockChangeSupportNumber == 1 &&
+                                       !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
+                               DRAMClockChangeSupportNumber = 2;
                        }
                }
        }
 
        for (k = 0; k < NumberOfActiveSurfaces; ++k) {
                if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
-                       st_vars->DRAMClockChangeMethod = 1;
+                       DRAMClockChangeMethod = 1;
                else if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
-                       st_vars->DRAMClockChangeMethod = 2;
+                       DRAMClockChangeMethod = 2;
        }
 
-       if (st_vars->DRAMClockChangeMethod == 0) {
-               if (st_vars->DRAMClockChangeSupportNumber == 0)
+       if (DRAMClockChangeMethod == 0) {
+               if (DRAMClockChangeSupportNumber == 0)
                        *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
-               else if (st_vars->DRAMClockChangeSupportNumber == 1)
+               else if (DRAMClockChangeSupportNumber == 1)
                        *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
                else
                        *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
-       } else if (st_vars->DRAMClockChangeMethod == 1) {
-               if (st_vars->DRAMClockChangeSupportNumber == 0)
+       } else if (DRAMClockChangeMethod == 1) {
+               if (DRAMClockChangeSupportNumber == 0)
                        *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
-               else if (st_vars->DRAMClockChangeSupportNumber == 1)
+               else if (DRAMClockChangeSupportNumber == 1)
                        *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
                else
                        *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
        } else {
-               if (st_vars->DRAMClockChangeSupportNumber == 0)
+               if (DRAMClockChangeSupportNumber == 0)
                        *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
-               else if (st_vars->DRAMClockChangeSupportNumber == 1)
+               else if (DRAMClockChangeSupportNumber == 1)
                        *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
                else
                        *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
@@ -4513,7 +4581,7 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
 
                dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (HTotal[k] / PixelClock[k]), 1);
                src_y_pstate_l = dml_ceil(dst_y_pstate * VRatio[k], SwathHeightY[k]);
-               src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + st_vars->LBLatencyHidingSourceLinesY[k];
+               src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
                sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + meta_row_height[k];
 
 #ifdef __DML_VBA_DEBUG__
@@ -4521,7 +4589,7 @@ dml_print("DML::%s: k=%d, DETBufferSizeY               = %d\n", __func__, k, DET
 dml_print("DML::%s: k=%d, BytePerPixelDETY             = %f\n", __func__, k, BytePerPixelDETY[k]);
 dml_print("DML::%s: k=%d, SwathWidthY                  = %d\n", __func__, k, SwathWidthY[k]);
 dml_print("DML::%s: k=%d, SwathHeightY                 = %d\n", __func__, k, SwathHeightY[k]);
-dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY  = %d\n", __func__, k, st_vars->LBLatencyHidingSourceLinesY[k]);
+dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY  = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]);
 dml_print("DML::%s: k=%d, dst_y_pstate      = %d\n", __func__, k, dst_y_pstate);
 dml_print("DML::%s: k=%d, src_y_pstate_l    = %d\n", __func__, k, src_y_pstate_l);
 dml_print("DML::%s: k=%d, src_y_ahead_l     = %d\n", __func__, k, src_y_ahead_l);
@@ -4532,7 +4600,7 @@ dml_print("DML::%s: k=%d, sub_vp_lines_l    = %d\n", __func__, k, sub_vp_lines_l
 
                if (BytePerPixelDETC[k] > 0) {
                        src_y_pstate_c = dml_ceil(dst_y_pstate * VRatioChroma[k], SwathHeightC[k]);
-                       src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + st_vars->LBLatencyHidingSourceLinesC[k];
+                       src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
                        sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + meta_row_height_chroma[k];
                        SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
 
index 37a314ce284b24d019f73a5356c06738ae99e55b..d293856ba906b4032b5e461884a37392ae01fa18 100644 (file)
@@ -30,7 +30,6 @@
 #include "os_types.h"
 #include "../dc_features.h"
 #include "../display_mode_structs.h"
-#include "dml/display_mode_vba.h"
 
 unsigned int dml32_dscceComputeDelay(
                unsigned int bpc,
@@ -82,7 +81,6 @@ void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
                double *DPPCLKUsingSingleDPP);
 
 void dml32_CalculateSwathAndDETConfiguration(
-               struct dml32_CalculateSwathAndDETConfiguration *st_vars,
                unsigned int DETSizeOverride[],
                enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
                unsigned int ConfigReturnBufferSizeInKByte,
@@ -362,7 +360,6 @@ void dml32_CalculateSurfaceSizeInMall(
                bool *ExceededMALLSize);
 
 void dml32_CalculateVMRowAndSwath(
-               struct dml32_CalculateVMRowAndSwath *st_vars,
                unsigned int NumberOfActiveSurfaces,
                DmlPipe myPipe[],
                unsigned int SurfaceSizeInMALL[],
@@ -715,7 +712,6 @@ double dml32_CalculateExtraLatency(
                unsigned int HostVMMaxNonCachedPageTableLevels);
 
 bool dml32_CalculatePrefetchSchedule(
-               struct dml32_CalculatePrefetchSchedule *st_vars,
                double HostVMInefficiencyFactor,
                DmlPipe *myPipe,
                unsigned int DSCDelay,
@@ -811,7 +807,6 @@ void dml32_CalculateFlipSchedule(
                bool *ImmediateFlipSupportedForPipe);
 
 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
-               struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport *st_vars,
                bool USRRetrainingRequiredFinal,
                enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
                unsigned int PrefetchMode,
index 84b4b00f29cbdda318d9aff98afcb5ca16cb66ae..c87091683b5dce2d8e7f1322e25473e8d2f3fbb7 100644 (file)
@@ -498,6 +498,13 @@ void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p
                                dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
                }
 
+               if ((int)(dcn3_21_soc.fclk_change_latency_us * 1000)
+                               != dc->bb_overrides.fclk_clock_change_latency_ns
+                               && dc->bb_overrides.fclk_clock_change_latency_ns) {
+                       dcn3_21_soc.fclk_change_latency_us =
+                               dc->bb_overrides.fclk_clock_change_latency_ns / 1000;
+               }
+
                if ((int)(dcn3_21_soc.dummy_pstate_latency_us * 1000)
                                != dc->bb_overrides.dummy_clock_change_latency_ns
                                && dc->bb_overrides.dummy_clock_change_latency_ns) {
index 8460aefe7b6d8205364059d32e1770dbb727694e..492aec634b685815a40c8cb304ae45427d98f323 100644 (file)
@@ -182,108 +182,6 @@ void Calculate256BBlockSizes(
                unsigned int *BlockWidth256BytesY,
                unsigned int *BlockWidth256BytesC);
 
-struct dml32_CalculateSwathAndDETConfiguration {
-       unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
-       unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
-       unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
-       unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
-       unsigned int RoundedUpSwathSizeBytesY;
-       unsigned int RoundedUpSwathSizeBytesC;
-       double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
-       double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
-       unsigned int TotalActiveDPP;
-       bool NoChromaSurfaces;
-       unsigned int DETBufferSizeInKByteForSwathCalculation;
-};
-
-struct dml32_CalculateVMRowAndSwath {
-       unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
-       unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
-       unsigned int PDEAndMetaPTEBytesFrameY;
-       unsigned int PDEAndMetaPTEBytesFrameC;
-       unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
-       unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
-       unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
-       unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
-       unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
-       unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
-       unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
-       unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
-       unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
-       unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
-       bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
-};
-
-struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport {
-       unsigned int SurfaceWithMinActiveFCLKChangeMargin;
-       unsigned int DRAMClockChangeSupportNumber;
-       unsigned int LastSurfaceWithoutMargin;
-       unsigned int DRAMClockChangeMethod;
-       bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin;
-       double MinActiveFCLKChangeMargin;
-       double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank;
-       double ActiveClockChangeLatencyHidingY;
-       double ActiveClockChangeLatencyHidingC;
-       double ActiveClockChangeLatencyHiding;
-       double EffectiveDETBufferSizeY;
-       double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
-       double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
-       double TotalPixelBW;
-       bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
-       double EffectiveLBLatencyHidingY;
-       double EffectiveLBLatencyHidingC;
-       double LinesInDETY[DC__NUM_DPP__MAX];
-       double LinesInDETC[DC__NUM_DPP__MAX];
-       unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
-       unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
-       double FullDETBufferingTimeY;
-       double FullDETBufferingTimeC;
-       double WritebackDRAMClockChangeLatencyMargin;
-       double WritebackFCLKChangeLatencyMargin;
-       double WritebackLatencyHiding;
-       bool SameTimingForFCLKChange;
-       unsigned int TotalActiveWriteback;
-       unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
-       unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
-};
-
-struct dml32_CalculatePrefetchSchedule {
-       unsigned int DPPCycles, DISPCLKCycles;
-       double DSTTotalPixelsAfterScaler;
-       double LineTime;
-       double dst_y_prefetch_equ;
-       double prefetch_bw_oto;
-       double Tvm_oto;
-       double Tr0_oto;
-       double Tvm_oto_lines;
-       double Tr0_oto_lines;
-       double dst_y_prefetch_oto;
-       double TimeForFetchingMetaPTE;
-       double TimeForFetchingRowInVBlank;
-       double LinesToRequestPrefetchPixelData;
-       unsigned int HostVMDynamicLevelsTrips;
-       double trip_to_mem;
-       double Tvm_trips;
-       double Tr0_trips;
-       double Tvm_trips_rounded;
-       double Tr0_trips_rounded;
-       double Lsw_oto;
-       double Tpre_rounded;
-       double prefetch_bw_equ;
-       double Tvm_equ;
-       double Tr0_equ;
-       double Tdmbf;
-       double Tdmec;
-       double Tdmsks;
-       double prefetch_sw_bytes;
-       double bytes_pp;
-       double dep_bytes;
-       unsigned int max_vratio_pre;
-       double min_Lsw;
-       double Tsw_est1;
-       double Tsw_est3;
-};
-
 struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation {
        unsigned int dummy_integer_array[2][DC__NUM_DPP__MAX];
        double dummy_single_array[2][DC__NUM_DPP__MAX];
@@ -355,10 +253,6 @@ struct dummy_vars {
        struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation
        DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation;
        struct dml32_ModeSupportAndSystemConfigurationFull dml32_ModeSupportAndSystemConfigurationFull;
-       struct dml32_CalculateSwathAndDETConfiguration dml32_CalculateSwathAndDETConfiguration;
-       struct dml32_CalculateVMRowAndSwath dml32_CalculateVMRowAndSwath;
-       struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport;
-       struct dml32_CalculatePrefetchSchedule dml32_CalculatePrefetchSchedule;
 };
 
 struct vba_vars_st {
index ab06c7fc74524c0f3dba85aa4afc6cfe0dd2ada4..9f3558c0ef110c4a524058f36116a9fb759a585c 100644 (file)
@@ -244,13 +244,15 @@ enum {
 #define ASICREV_IS_GC_10_3_7(eChipRev) ((eChipRev >= GC_10_3_7_A0) && (eChipRev < GC_10_3_7_UNKNOWN))
 
 #define AMDGPU_FAMILY_GC_11_0_0 145
-#define AMDGPU_FAMILY_GC_11_0_2 148
+#define AMDGPU_FAMILY_GC_11_0_1 148
 #define GC_11_0_0_A0 0x1
 #define GC_11_0_2_A0 0x10
+#define GC_11_0_3_A0 0x20
 #define GC_11_UNKNOWN 0xFF
 
 #define ASICREV_IS_GC_11_0_0(eChipRev) (eChipRev < GC_11_0_2_A0)
-#define ASICREV_IS_GC_11_0_2(eChipRev) (eChipRev >= GC_11_0_2_A0 && eChipRev < GC_11_UNKNOWN)
+#define ASICREV_IS_GC_11_0_2(eChipRev) (eChipRev >= GC_11_0_2_A0 && eChipRev < GC_11_0_3_A0)
+#define ASICREV_IS_GC_11_0_3(eChipRev) (eChipRev >= GC_11_0_3_A0 && eChipRev < GC_11_UNKNOWN)
 
 /*
  * ASIC chip ID
index f093b49c5e6e6143cf0d658514d78fd75f7e6cd0..3bf08a60c45c6e79dc893f1bb1b6dcb6f4d0a9ec 100644 (file)
@@ -119,13 +119,15 @@ enum dc_log_type {
        LOG_HDMI_RETIMER_REDRIVER,
        LOG_DSC,
        LOG_SMU_MSG,
+       LOG_DC2RESERVED4,
+       LOG_DC2RESERVED5,
        LOG_DWB,
        LOG_GAMMA_DEBUG,
        LOG_MAX_HW_POINTS,
        LOG_ALL_TF_CHANNELS,
        LOG_SAMPLE_1DLUT,
        LOG_DP2,
-       LOG_SECTION_TOTAL_COUNT
+       LOG_DC2RESERVED12,
 };
 
 #define DC_MIN_LOG_MASK ((1 << LOG_ERROR) | \
index da09ba7589f7316e0dec6b00df3d6e10527dab42..0f39ab9dc5b418d32e8d8d259fd5219fe318cdbc 100644 (file)
@@ -613,10 +613,6 @@ static void build_vrr_infopacket_data_v1(const struct mod_vrr_params *vrr,
         * Note: We should never go above the field rate of the mode timing set.
         */
        infopacket->sb[8] = (unsigned char)((vrr->max_refresh_in_uhz + 500000) / 1000000);
-
-       /* FreeSync HDR */
-       infopacket->sb[9] = 0;
-       infopacket->sb[10] = 0;
 }
 
 static void build_vrr_infopacket_data_v3(const struct mod_vrr_params *vrr,
@@ -684,10 +680,6 @@ static void build_vrr_infopacket_data_v3(const struct mod_vrr_params *vrr,
 
        /* PB16 : Reserved bits 7:1, FixedRate bit 0 */
        infopacket->sb[16] = (vrr->state == VRR_STATE_ACTIVE_FIXED) ? 1 : 0;
-
-       //FreeSync HDR
-       infopacket->sb[9] = 0;
-       infopacket->sb[10] = 0;
 }
 
 static void build_vrr_infopacket_fs2_data(enum color_transfer_func app_tf,
@@ -772,8 +764,7 @@ static void build_vrr_infopacket_header_v2(enum signal_type signal,
                /* HB2  = [Bits 7:5 = 0] [Bits 4:0 = Length = 0x09] */
                infopacket->hb2 = 0x09;
 
-               *payload_size = 0x0A;
-
+               *payload_size = 0x09;
        } else if (dc_is_dp_signal(signal)) {
 
                /* HEADER */
@@ -822,9 +813,9 @@ static void build_vrr_infopacket_header_v3(enum signal_type signal,
                infopacket->hb1 = version;
 
                /* HB2  = [Bits 7:5 = 0] [Bits 4:0 = Length] */
-               *payload_size = 0x10;
-               infopacket->hb2 = *payload_size - 1; //-1 for checksum
+               infopacket->hb2 = 0x10;
 
+               *payload_size = 0x10;
        } else if (dc_is_dp_signal(signal)) {
 
                /* HEADER */
index 76f695a1d0658a2cdf9eb90c0642a9fdff5d6361..ae2d337158f3b0b3d8993245273ba6f050b2ebee 100644 (file)
@@ -27,7 +27,7 @@
 // *** IMPORTANT ***
 // SMU TEAM: Always increment the interface version if
 // any structure is changed in this file
-#define PMFW_DRIVER_IF_VERSION 4
+#define PMFW_DRIVER_IF_VERSION 5
 
 typedef struct {
   int32_t value;
@@ -197,6 +197,8 @@ typedef struct {
 
   uint16_t SkinTemp;
   uint16_t DeviceState;
+  uint16_t CurTemp;                     //[centi-Celsius]
+  uint16_t spare2;
 } SmuMetrics_t;
 
 typedef struct {
index c02e5e576728231d0842b14a7cc4716356227eaf..6fe2fe92ebd75d785dbf32b9c3a7434daab88a1d 100644 (file)
@@ -28,7 +28,7 @@
 #define SMU13_DRIVER_IF_VERSION_INV 0xFFFFFFFF
 #define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04
 #define SMU13_DRIVER_IF_VERSION_ALDE 0x08
-#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x04
+#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x05
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x2C
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2C
index fa520d79ef67fc698a930285c1a4011fb4765f9f..6db67f082d91758eece57c919e14e705e0148354 100644 (file)
@@ -4283,6 +4283,7 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = {
        .dump_pptable = sienna_cichlid_dump_pptable,
        .init_microcode = smu_v11_0_init_microcode,
        .load_microcode = smu_v11_0_load_microcode,
+       .fini_microcode = smu_v11_0_fini_microcode,
        .init_smc_tables = sienna_cichlid_init_smc_tables,
        .fini_smc_tables = smu_v11_0_fini_smc_tables,
        .init_power = smu_v11_0_init_power,
index e8fe84f806d172f98b56b5c411f566c26e742b56..18ee3b5e64c50fe5fa5450aba56749c50c66fe68 100644 (file)
@@ -212,6 +212,9 @@ int smu_v13_0_init_pptable_microcode(struct smu_context *smu)
        if (!adev->scpm_enabled)
                return 0;
 
+       if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 7))
+               return 0;
+
        /* override pptable_id from driver parameter */
        if (amdgpu_smu_pptable_id >= 0) {
                pptable_id = amdgpu_smu_pptable_id;
@@ -219,16 +222,10 @@ int smu_v13_0_init_pptable_microcode(struct smu_context *smu)
        } else {
                pptable_id = smu->smu_table.boot_values.pp_table_id;
 
-               if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 7) &&
-                       pptable_id == 3667)
-                       pptable_id = 36671;
-
-               if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 7) &&
-                       pptable_id == 3688)
-                       pptable_id = 36881;
                /*
                 * Temporary solution for SMU V13.0.0 with SCPM enabled:
                 *   - use 36831 signed pptable when pp_table_id is 3683
+                *   - use 37151 signed pptable when pp_table_id is 3715
                 *   - use 36641 signed pptable when pp_table_id is 3664 or 0
                 * TODO: drop these when the pptable carried in vbios is ready.
                 */
@@ -241,6 +238,9 @@ int smu_v13_0_init_pptable_microcode(struct smu_context *smu)
                        case 3683:
                                pptable_id = 36831;
                                break;
+                       case 3715:
+                               pptable_id = 37151;
+                               break;
                        default:
                                dev_err(adev->dev, "Unsupported pptable id %d\n", pptable_id);
                                return -EINVAL;
@@ -478,7 +478,7 @@ int smu_v13_0_setup_pptable(struct smu_context *smu)
 
                /*
                 * Temporary solution for SMU V13.0.0 with SCPM disabled:
-                *   - use 3664 or 3683 on request
+                *   - use 3664, 3683 or 3715 on request
                 *   - use 3664 when pptable_id is 0
                 * TODO: drop these when the pptable carried in vbios is ready.
                 */
@@ -489,6 +489,7 @@ int smu_v13_0_setup_pptable(struct smu_context *smu)
                                break;
                        case 3664:
                        case 3683:
+                       case 3715:
                                break;
                        default:
                                dev_err(adev->dev, "Unsupported pptable id %d\n", pptable_id);
@@ -2344,8 +2345,8 @@ int smu_v13_0_set_gfx_power_up_by_imu(struct smu_context *smu)
 
        index = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_MSG,
                                               SMU_MSG_EnableGfxImu);
-
-       return smu_cmn_send_msg_without_waiting(smu, index, 0);
+       /* Param 1 to tell PMFW to enable GFXOFF feature */
+       return smu_cmn_send_msg_without_waiting(smu, index, 1);
 }
 
 int smu_v13_0_od_edit_dpm_table(struct smu_context *smu,
index 1bbeceeb9e3cbd67f2df9fdb68c160f3ad7b8694..df4a47acd72472353625ea41a3fadb499fa6743a 100644 (file)
@@ -1792,7 +1792,9 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
        .dump_pptable = smu_v13_0_0_dump_pptable,
        .init_microcode = smu_v13_0_init_microcode,
        .load_microcode = smu_v13_0_load_microcode,
+       .fini_microcode = smu_v13_0_fini_microcode,
        .init_smc_tables = smu_v13_0_0_init_smc_tables,
+       .fini_smc_tables = smu_v13_0_fini_smc_tables,
        .init_power = smu_v13_0_init_power,
        .fini_power = smu_v13_0_fini_power,
        .check_fw_status = smu_v13_0_check_fw_status,
index 82d3718d83244f2e8c2fbbc4789d0ea73cc28bb9..97e1d55dcaad5149d6c7ef9acd4df42cbbc4a76d 100644 (file)
@@ -71,7 +71,6 @@ static struct cmn2asic_msg_mapping smu_v13_0_4_message_map[SMU_MSG_MAX_COUNT] =
        MSG_MAP(TestMessage,                    PPSMC_MSG_TestMessage,                  1),
        MSG_MAP(GetSmuVersion,                  PPSMC_MSG_GetPmfwVersion,               1),
        MSG_MAP(GetDriverIfVersion,             PPSMC_MSG_GetDriverIfVersion,           1),
-       MSG_MAP(EnableGfxOff,                   PPSMC_MSG_EnableGfxOff,                 1),
        MSG_MAP(AllowGfxOff,                    PPSMC_MSG_AllowGfxOff,                  1),
        MSG_MAP(DisallowGfxOff,                 PPSMC_MSG_DisallowGfxOff,               1),
        MSG_MAP(PowerDownVcn,                   PPSMC_MSG_PowerDownVcn,                 1),
@@ -199,6 +198,9 @@ static int smu_v13_0_4_fini_smc_tables(struct smu_context *smu)
        kfree(smu_table->watermarks_table);
        smu_table->watermarks_table = NULL;
 
+       kfree(smu_table->gpu_metrics_table);
+       smu_table->gpu_metrics_table = NULL;
+
        return 0;
 }
 
@@ -226,18 +228,6 @@ static int smu_v13_0_4_system_features_control(struct smu_context *smu, bool en)
        return ret;
 }
 
-static int smu_v13_0_4_post_smu_init(struct smu_context *smu)
-{
-       struct amdgpu_device *adev = smu->adev;
-       int ret = 0;
-
-       /* allow message will be sent after enable message */
-       ret = smu_cmn_send_smc_msg(smu, SMU_MSG_EnableGfxOff, NULL);
-       if (ret)
-               dev_err(adev->dev, "Failed to Enable GfxOff!\n");
-       return ret;
-}
-
 static ssize_t smu_v13_0_4_get_gpu_metrics(struct smu_context *smu,
                                           void **table)
 {
@@ -1026,7 +1016,6 @@ static const struct pptable_funcs smu_v13_0_4_ppt_funcs = {
        .get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
        .set_driver_table_location = smu_v13_0_set_driver_table_location,
        .gfx_off_control = smu_v13_0_gfx_off_control,
-       .post_init = smu_v13_0_4_post_smu_init,
        .mode2_reset = smu_v13_0_4_mode2_reset,
        .get_dpm_ultimate_freq = smu_v13_0_4_get_dpm_ultimate_freq,
        .od_edit_dpm_table = smu_v13_0_od_edit_dpm_table,
index 47360ef5c17589d1ef4ceee33b49d6b0064ca39b..66445964efbd1e5a94c7cfa3d2bbfca7b76e8c89 100644 (file)
@@ -176,6 +176,9 @@ static int smu_v13_0_5_fini_smc_tables(struct smu_context *smu)
        kfree(smu_table->watermarks_table);
        smu_table->watermarks_table = NULL;
 
+       kfree(smu_table->gpu_metrics_table);
+       smu_table->gpu_metrics_table = NULL;
+
        return 0;
 }
 
index 9dd56e73218be8b37c613ba7610cce412d5a1a18..1016d1c216d8c7e5576f95ce97268a777513959b 100644 (file)
@@ -1567,6 +1567,16 @@ static int smu_v13_0_7_set_mp1_state(struct smu_context *smu,
        return ret;
 }
 
+static bool smu_v13_0_7_is_mode1_reset_supported(struct smu_context *smu)
+{
+       struct amdgpu_device *adev = smu->adev;
+
+       /* SRIOV does not support SMU mode1 reset */
+       if (amdgpu_sriov_vf(adev))
+               return false;
+
+       return true;
+}
 static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
        .get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask,
        .set_default_dpm_table = smu_v13_0_7_set_default_dpm_table,
@@ -1574,7 +1584,9 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
        .dump_pptable = smu_v13_0_7_dump_pptable,
        .init_microcode = smu_v13_0_init_microcode,
        .load_microcode = smu_v13_0_load_microcode,
+       .fini_microcode = smu_v13_0_fini_microcode,
        .init_smc_tables = smu_v13_0_7_init_smc_tables,
+       .fini_smc_tables = smu_v13_0_fini_smc_tables,
        .init_power = smu_v13_0_init_power,
        .fini_power = smu_v13_0_fini_power,
        .check_fw_status = smu_v13_0_7_check_fw_status,
@@ -1624,6 +1636,8 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
        .baco_set_state = smu_v13_0_baco_set_state,
        .baco_enter = smu_v13_0_baco_enter,
        .baco_exit = smu_v13_0_baco_exit,
+       .mode1_reset_is_support = smu_v13_0_7_is_mode1_reset_supported,
+       .mode1_reset = smu_v13_0_mode1_reset,
        .set_mp1_state = smu_v13_0_7_set_mp1_state,
 };