Merge tag 'amd-drm-next-5.14-2021-05-19' of https://gitlab.freedesktop.org/agd5f...
authorDave Airlie <airlied@redhat.com>
Fri, 21 May 2021 05:29:34 +0000 (15:29 +1000)
committerDave Airlie <airlied@redhat.com>
Fri, 21 May 2021 05:29:40 +0000 (15:29 +1000)
amd-drm-next-5.14-2021-05-19:

amdgpu:
- Aldebaran updates
- More LTTPR display work
- Vangogh updates
- SDMA 5.x GCR fixes
- RAS fixes
- PCIe ASPM support
- Modifier fixes
- Enable TMZ on Renoir
- Buffer object code cleanup
- Display overlay fixes
- Initial support for multiple eDP panels
- Initial SR-IOV support for Aldebaran
- DP link training refactor
- Misc code cleanups and bug fixes
- SMU regression fixes for variable sized arrays
- MAINTAINERS fixes for amdgpu

amdkfd:
- Initial SR-IOV support for Aldebaran
- Topology fixes
- Initial HMM SVM support
- Misc code cleanups and bug fixes

radeon:
- Misc code cleanups and bug fixes
- SMU regression fixes for variable sized arrays
- Flickering fix for Oland with multiple 4K displays

UAPI:
- amdgpu: Drop AMDGPU_GEM_CREATE_SHADOW flag.
  This was always a kernel internal flag and userspace use of it has always been blocked.
  It's no longer needed so remove it.
- amdkgd: HMM SVM support
  Overview: https://patchwork.freedesktop.org/series/85562/
  Porposed userspace: https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/tree/fxkamd/hmm-wip

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210520031258.231896-1-alexander.deucher@amd.com
159 files changed:
MAINTAINERS
drivers/gpu/drm/amd/amdgpu/Makefile
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c [new file with mode: 0644]
drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
drivers/gpu/drm/amd/amdgpu/dce_virtual.c
drivers/gpu/drm/amd/amdgpu/df_v3_6.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h
drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h
drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
drivers/gpu/drm/amd/amdgpu/nv.c
drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c
drivers/gpu/drm/amd/amdgpu/soc15.c
drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
drivers/gpu/drm/amd/amdgpu/vega10_ih.c
drivers/gpu/drm/amd/amdgpu/vi.c
drivers/gpu/drm/amd/amdkfd/Kconfig
drivers/gpu/drm/amd/amdkfd/Makefile
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
drivers/gpu/drm/amd/amdkfd/kfd_crat.c
drivers/gpu/drm/amd/amdkfd/kfd_device.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
drivers/gpu/drm/amd/amdkfd/kfd_migrate.c [new file with mode: 0644]
drivers/gpu/drm/amd/amdkfd/kfd_migrate.h [new file with mode: 0644]
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h [new file with mode: 0644]
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_process.c
drivers/gpu/drm/amd/amdkfd/kfd_svm.c [new file with mode: 0644]
drivers/gpu/drm/amd/amdkfd/kfd_svm.h [new file with mode: 0644]
drivers/gpu/drm/amd/amdkfd/kfd_topology.c
drivers/gpu/drm/amd/amdkfd/kfd_topology.h
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.h
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
drivers/gpu/drm/amd/display/dc/Makefile
drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
drivers/gpu/drm/amd/display/dc/core/dc.c
drivers/gpu/drm/amd/display/dc/core/dc_link.c
drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c
drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c
drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c
drivers/gpu/drm/amd/display/dc/core/dc_resource.c
drivers/gpu/drm/amd/display/dc/dc.h
drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
drivers/gpu/drm/amd/display/dc/dc_dp_types.h
drivers/gpu/drm/amd/display/dc/dc_dsc.h
drivers/gpu/drm/amd/display/dc/dc_hw_types.h
drivers/gpu/drm/amd/display/dc/dc_link.h
drivers/gpu/drm/amd/display/dc/dc_types.h
drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
drivers/gpu/drm/amd/display/dc/dce/dce_aux.h
drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c
drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.h
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
drivers/gpu/drm/amd/display/dc/dm_helpers.h
drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c
drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h
drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h
drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h
drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
drivers/gpu/drm/amd/display/dc/irq_types.h
drivers/gpu/drm/amd/display/dmub/dmub_srv.h
drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c
drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h
drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
drivers/gpu/drm/amd/display/include/link_service_types.h
drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c
drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c
drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c
drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h
drivers/gpu/drm/amd/include/kgd_pp_interface.h
drivers/gpu/drm/amd/pm/amdgpu_pm.c
drivers/gpu/drm/amd/pm/inc/smu13_driver_if_aldebaran.h
drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c
drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
drivers/gpu/drm/radeon/radeon_device.c
include/uapi/drm/amdgpu_drm.h
include/uapi/linux/kfd_ioctl.h

index a7a1c8e21a2ea0d6d153fbf58a23fba09817c078..315120c4124d5a46b09e4b230e851e96826dfc0d 100644 (file)
@@ -878,7 +878,7 @@ M:  Harry Wentland <harry.wentland@amd.com>
 M:     Leo Li <sunpeng.li@amd.com>
 L:     amd-gfx@lists.freedesktop.org
 S:     Supported
-T:     git git://people.freedesktop.org/~agd5f/linux
+T:     git https://gitlab.freedesktop.org/agd5f/linux.git
 F:     drivers/gpu/drm/amd/display/
 
 AMD FAM15H PROCESSOR POWER MONITORING DRIVER
@@ -954,7 +954,7 @@ AMD POWERPLAY
 M:     Evan Quan <evan.quan@amd.com>
 L:     amd-gfx@lists.freedesktop.org
 S:     Supported
-T:     git git://people.freedesktop.org/~agd5f/linux
+T:     git https://gitlab.freedesktop.org/agd5f/linux.git
 F:     drivers/gpu/drm/amd/pm/powerplay/
 
 AMD SEATTLE DEVICE TREE SUPPORT
index d216b7ecb5d115070b4bbbeb9a83eb139f4730fd..cc36570b0d2f06027bf66eb8ae6687bc0e245fcf 100644 (file)
@@ -56,7 +56,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
        amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
        amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
        amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
-       amdgpu_fw_attestation.o amdgpu_securedisplay.o
+       amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_hdp.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
index 6cabecc740074fd195a7af7b4f1d6c260a8ec751..ad5f508924b8eb7031af1b0f217c687a1b40b42c 100644 (file)
@@ -1075,7 +1075,8 @@ struct amdgpu_device {
 
        atomic_t                        throttling_logging_enabled;
        struct ratelimit_state          throttling_logging_rs;
-       uint32_t                        ras_features;
+       uint32_t                        ras_hw_enabled;
+       uint32_t                        ras_enabled;
 
        bool                            in_pci_err_recovery;
        struct pci_saved_state          *pci_state;
index 2e9b16fb3fcd149b2063b05a9c936e3f0e5a42f9..bf2939b6eb430eb0ce7bce72b556ab829c64e810 100644 (file)
@@ -76,7 +76,7 @@ struct amdgpu_atif {
 /**
  * amdgpu_atif_call - call an ATIF method
  *
- * @handle: acpi handle
+ * @atif: acpi handle
  * @function: the ATIF function to execute
  * @params: ATIF function params
  *
@@ -166,7 +166,6 @@ static void amdgpu_atif_parse_functions(struct amdgpu_atif_functions *f, u32 mas
 /**
  * amdgpu_atif_verify_interface - verify ATIF
  *
- * @handle: acpi handle
  * @atif: amdgpu atif struct
  *
  * Execute the ATIF_FUNCTION_VERIFY_INTERFACE ATIF function
@@ -240,8 +239,7 @@ out:
 /**
  * amdgpu_atif_get_notification_params - determine notify configuration
  *
- * @handle: acpi handle
- * @n: atif notification configuration struct
+ * @atif: acpi handle
  *
  * Execute the ATIF_FUNCTION_GET_SYSTEM_PARAMETERS ATIF function
  * to determine if a notifier is used and if so which one
@@ -304,7 +302,7 @@ out:
 /**
  * amdgpu_atif_query_backlight_caps - get min and max backlight input signal
  *
- * @handle: acpi handle
+ * @atif: acpi handle
  *
  * Execute the QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS ATIF function
  * to determine the acceptable range of backlight values
@@ -363,7 +361,7 @@ out:
 /**
  * amdgpu_atif_get_sbios_requests - get requested sbios event
  *
- * @handle: acpi handle
+ * @atif: acpi handle
  * @req: atif sbios request struct
  *
  * Execute the ATIF_FUNCTION_GET_SYSTEM_BIOS_REQUESTS ATIF function
@@ -899,6 +897,8 @@ void amdgpu_acpi_fini(struct amdgpu_device *adev)
 /**
  * amdgpu_acpi_is_s0ix_supported
  *
+ * @adev: amdgpu_device_pointer
+ *
  * returns true if supported, false if not.
  */
 bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev)
index 5ffb07b02810ef2be2d6f059234d3b91341b5dea..313ee49b9f17dbc568edc42d494f517578d817bd 100644 (file)
@@ -75,6 +75,7 @@ struct amdgpu_amdkfd_fence {
        struct mm_struct *mm;
        spinlock_t lock;
        char timeline_name[TASK_COMM_LEN];
+       struct svm_range_bo *svm_bo;
 };
 
 struct amdgpu_kfd_dev {
@@ -148,7 +149,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
                                        int queue_bit);
 
 struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
-                                                               struct mm_struct *mm);
+                               struct mm_struct *mm,
+                               struct svm_range_bo *svm_bo);
 #if IS_ENABLED(CONFIG_HSA_AMD)
 bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
 struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
@@ -234,22 +236,27 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s
        })
 
 /* GPUVM API */
+#define drm_priv_to_vm(drm_priv)                                       \
+       (&((struct amdgpu_fpriv *)                                      \
+               ((struct drm_file *)(drm_priv))->driver_priv)->vm)
+
 int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
                                        struct file *filp, u32 pasid,
-                                       void **vm, void **process_info,
+                                       void **process_info,
                                        struct dma_fence **ef);
-void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm);
-uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
+void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *drm_priv);
+uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
                struct kgd_dev *kgd, uint64_t va, uint64_t size,
-               void *vm, struct kgd_mem **mem,
+               void *drm_priv, struct kgd_mem **mem,
                uint64_t *offset, uint32_t flags);
 int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
-               struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size);
+               struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv,
+               uint64_t *size);
 int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
-               struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
+               struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
 int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
-               struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
+               struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
 int amdgpu_amdkfd_gpuvm_sync_memory(
                struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
 int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
@@ -260,7 +267,7 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
                                              struct kfd_vm_fault_info *info);
 int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
                                      struct dma_buf *dmabuf,
-                                     uint64_t va, void *vm,
+                                     uint64_t va, void *drm_priv,
                                      struct kgd_mem **mem, uint64_t *size,
                                      uint64_t *mmap_offset);
 int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
@@ -270,6 +277,7 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
 void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
                                struct amdgpu_vm *vm);
 void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
+void amdgpu_amdkfd_reserve_system_mem(uint64_t size);
 #else
 static inline
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
index 9ef9f3ddad4822904a9ddeeab017ea0053d613bd..6409d6b1b2dfd45eba60bdfeae719240d46c2d71 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/firmware.h>
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_amdkfd_arcturus.h"
 #include "sdma0/sdma0_4_2_2_offset.h"
 #include "sdma0/sdma0_4_2_2_sh_mask.h"
 #include "sdma1/sdma1_4_2_2_offset.h"
index 5af46493397662dd7cab015da86d1a895894db8f..1d0dbff87d3f39715aeee7d4cc84d1d0bf7bf69f 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/slab.h>
 #include <linux/sched/mm.h>
 #include "amdgpu_amdkfd.h"
+#include "kfd_svm.h"
 
 static const struct dma_fence_ops amdkfd_fence_ops;
 static atomic_t fence_seq = ATOMIC_INIT(0);
@@ -60,7 +61,8 @@ static atomic_t fence_seq = ATOMIC_INIT(0);
  */
 
 struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
-                                                      struct mm_struct *mm)
+                               struct mm_struct *mm,
+                               struct svm_range_bo *svm_bo)
 {
        struct amdgpu_amdkfd_fence *fence;
 
@@ -73,7 +75,7 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
        fence->mm = mm;
        get_task_comm(fence->timeline_name, current);
        spin_lock_init(&fence->lock);
-
+       fence->svm_bo = svm_bo;
        dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock,
                   context, atomic_inc_return(&fence_seq));
 
@@ -111,6 +113,8 @@ static const char *amdkfd_fence_get_timeline_name(struct dma_fence *f)
  *  a KFD BO and schedules a job to move the BO.
  *  If fence is already signaled return true.
  *  If fence is not signaled schedule a evict KFD process work item.
+ *
+ *  @f: dma_fence
  */
 static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
 {
@@ -122,16 +126,20 @@ static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
        if (dma_fence_is_signaled(f))
                return true;
 
-       if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f))
-               return true;
-
+       if (!fence->svm_bo) {
+               if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f))
+                       return true;
+       } else {
+               if (!svm_range_schedule_evict_svm_bo(fence))
+                       return true;
+       }
        return false;
 }
 
 /**
  * amdkfd_fence_release - callback that fence can be freed
  *
- * @fence: fence
+ * @f: dma_fence
  *
  * This function is called when the reference count becomes zero.
  * Drops the mm_struct reference and RCU schedules freeing up the fence.
index b43e68fc13782772f0bf18ecb1a020a38dd0e9b5..ed3014fbb5630b7264f24e8cde8165c8e906dba6 100644 (file)
@@ -719,7 +719,7 @@ static void unlock_spi_csq_mutexes(struct amdgpu_device *adev)
 }
 
 /**
- * @get_wave_count: Read device registers to get number of waves in flight for
+ * get_wave_count: Read device registers to get number of waves in flight for
  * a particular queue. The method also returns the VMID associated with the
  * queue.
  *
@@ -755,19 +755,19 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
 }
 
 /**
- * @kgd_gfx_v9_get_cu_occupancy: Reads relevant registers associated with each
+ * kgd_gfx_v9_get_cu_occupancy: Reads relevant registers associated with each
  * shader engine and aggregates the number of waves that are in flight for the
  * process whose pasid is provided as a parameter. The process could have ZERO
  * or more queues running and submitting waves to compute units.
  *
  * @kgd: Handle of device from which to get number of waves in flight
  * @pasid: Identifies the process for which this query call is invoked
- * @wave_cnt: Output parameter updated with number of waves in flight that
+ * @pasid_wave_cnt: Output parameter updated with number of waves in flight that
  * belong to process with given pasid
  * @max_waves_per_cu: Output parameter updated with maximum number of waves
  * possible per Compute Unit
  *
- * @note: It's possible that the device has too many queues (oversubscription)
+ * Note: It's possible that the device has too many queues (oversubscription)
  * in which case a VMID could be remapped to a different PASID. This could lead
  * to an iaccurate wave count. Following is a high-level sequence:
  *    Time T1: vmid = getVmid(); vmid is associated with Pasid P1
index 7d4118c8128a54f131b3190b149ebe9062937f41..dfa025d694f87dbb6ca1e9442a3210a987c1b3aa 100644 (file)
@@ -33,9 +33,6 @@
 #include <uapi/linux/kfd_ioctl.h>
 #include "amdgpu_xgmi.h"
 
-/* BO flag to indicate a KFD userptr BO */
-#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
-
 /* Userptr restore delay, just long enough to allow consecutive VM
  * changes to accumulate
  */
@@ -108,6 +105,11 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
                (kfd_mem_limit.max_ttm_mem_limit >> 20));
 }
 
+void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
+{
+       kfd_mem_limit.system_mem_used += size;
+}
+
 /* Estimate page table size needed to represent a given memory size
  *
  * With 4KB pages, we need one 8 byte PTE for each 4KB of memory
@@ -217,7 +219,7 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
        u32 domain = bo->preferred_domains;
        bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU);
 
-       if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
+       if (bo->flags & AMDGPU_AMDKFD_CREATE_USERPTR_BO) {
                domain = AMDGPU_GEM_DOMAIN_CPU;
                sg = false;
        }
@@ -967,7 +969,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
 
                info->eviction_fence =
                        amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
-                                                  current->mm);
+                                                  current->mm,
+                                                  NULL);
                if (!info->eviction_fence) {
                        pr_err("Failed to create eviction fence\n");
                        ret = -ENOMEM;
@@ -1036,15 +1039,19 @@ create_evict_fence_fail:
 
 int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
                                           struct file *filp, u32 pasid,
-                                          void **vm, void **process_info,
+                                          void **process_info,
                                           struct dma_fence **ef)
 {
        struct amdgpu_device *adev = get_amdgpu_device(kgd);
-       struct drm_file *drm_priv = filp->private_data;
-       struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv;
-       struct amdgpu_vm *avm = &drv_priv->vm;
+       struct amdgpu_fpriv *drv_priv;
+       struct amdgpu_vm *avm;
        int ret;
 
+       ret = amdgpu_file_to_fpriv(filp, &drv_priv);
+       if (ret)
+               return ret;
+       avm = &drv_priv->vm;
+
        /* Already a compute VM? */
        if (avm->process_info)
                return -EINVAL;
@@ -1059,7 +1066,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
        if (ret)
                return ret;
 
-       *vm = (void *)avm;
+       amdgpu_vm_set_task_info(avm);
 
        return 0;
 }
@@ -1100,15 +1107,17 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
        }
 }
 
-void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm)
+void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *drm_priv)
 {
        struct amdgpu_device *adev = get_amdgpu_device(kgd);
-       struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+       struct amdgpu_vm *avm;
 
-       if (WARN_ON(!kgd || !vm))
+       if (WARN_ON(!kgd || !drm_priv))
                return;
 
-       pr_debug("Releasing process vm %p\n", vm);
+       avm = drm_priv_to_vm(drm_priv);
+
+       pr_debug("Releasing process vm %p\n", avm);
 
        /* The original pasid of amdgpu vm has already been
         * released during making a amdgpu vm to a compute vm
@@ -1119,9 +1128,9 @@ void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm)
        amdgpu_vm_release_compute(adev, avm);
 }
 
-uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
+uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv)
 {
-       struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+       struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
        struct amdgpu_bo *pd = avm->root.base.bo;
        struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
 
@@ -1132,11 +1141,11 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
 
 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
                struct kgd_dev *kgd, uint64_t va, uint64_t size,
-               void *vm, struct kgd_mem **mem,
+               void *drm_priv, struct kgd_mem **mem,
                uint64_t *offset, uint32_t flags)
 {
        struct amdgpu_device *adev = get_amdgpu_device(kgd);
-       struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+       struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
        enum ttm_bo_type bo_type = ttm_bo_type_device;
        struct sg_table *sg = NULL;
        uint64_t user_addr = 0;
@@ -1216,6 +1225,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
                         domain_string(alloc_domain), ret);
                goto err_bo_create;
        }
+       ret = drm_vma_node_allow(&gobj->vma_node, drm_priv);
+       if (ret) {
+               pr_debug("Failed to allow vma node access. ret %d\n", ret);
+               goto err_node_allow;
+       }
        bo = gem_to_amdgpu_bo(gobj);
        if (bo_type == ttm_bo_type_sg) {
                bo->tbo.sg = sg;
@@ -1224,7 +1238,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
        bo->kfd_bo = *mem;
        (*mem)->bo = bo;
        if (user_addr)
-               bo->flags |= AMDGPU_AMDKFD_USERPTR_BO;
+               bo->flags |= AMDGPU_AMDKFD_CREATE_USERPTR_BO;
 
        (*mem)->va = va;
        (*mem)->domain = domain;
@@ -1245,6 +1259,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 
 allocate_init_user_pages_failed:
        remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
+       drm_vma_node_revoke(&gobj->vma_node, drm_priv);
+err_node_allow:
        amdgpu_bo_unref(&bo);
        /* Don't unreserve system mem limit twice */
        goto err_reserve_limit;
@@ -1262,7 +1278,8 @@ err:
 }
 
 int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
-               struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size)
+               struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv,
+               uint64_t *size)
 {
        struct amdkfd_process_info *process_info = mem->process_info;
        unsigned long bo_size = mem->bo->tbo.base.size;
@@ -1339,6 +1356,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
        }
 
        /* Free the BO*/
+       drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
        drm_gem_object_put(&mem->bo->tbo.base);
        mutex_destroy(&mem->lock);
        kfree(mem);
@@ -1347,10 +1365,10 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 }
 
 int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
-               struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
+               struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv)
 {
        struct amdgpu_device *adev = get_amdgpu_device(kgd);
-       struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+       struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
        int ret;
        struct amdgpu_bo *bo;
        uint32_t domain;
@@ -1391,9 +1409,9 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
        pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n",
                        mem->va,
                        mem->va + bo_size * (1 + mem->aql_queue),
-                       vm, domain_string(domain));
+                       avm, domain_string(domain));
 
-       ret = reserve_bo_and_vm(mem, vm, &ctx);
+       ret = reserve_bo_and_vm(mem, avm, &ctx);
        if (unlikely(ret))
                goto out;
 
@@ -1437,7 +1455,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
        }
 
        list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
-               if (entry->bo_va->base.vm == vm && !entry->is_mapped) {
+               if (entry->bo_va->base.vm == avm && !entry->is_mapped) {
                        pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n",
                                        entry->va, entry->va + bo_size,
                                        entry);
@@ -1449,7 +1467,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
                                goto map_bo_to_gpuvm_failed;
                        }
 
-                       ret = vm_update_pds(vm, ctx.sync);
+                       ret = vm_update_pds(avm, ctx.sync);
                        if (ret) {
                                pr_err("Failed to update page directories\n");
                                goto map_bo_to_gpuvm_failed;
@@ -1485,11 +1503,11 @@ out:
 }
 
 int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
-               struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
+               struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv)
 {
        struct amdgpu_device *adev = get_amdgpu_device(kgd);
-       struct amdkfd_process_info *process_info =
-               ((struct amdgpu_vm *)vm)->process_info;
+       struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
+       struct amdkfd_process_info *process_info = avm->process_info;
        unsigned long bo_size = mem->bo->tbo.base.size;
        struct kfd_bo_va_list *entry;
        struct bo_vm_reservation_context ctx;
@@ -1497,7 +1515,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 
        mutex_lock(&mem->lock);
 
-       ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx);
+       ret = reserve_bo_and_cond_vms(mem, avm, BO_VM_MAPPED, &ctx);
        if (unlikely(ret))
                goto out;
        /* If no VMs were reserved, it means the BO wasn't actually mapped */
@@ -1506,17 +1524,17 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
                goto unreserve_out;
        }
 
-       ret = vm_validate_pt_pd_bos((struct amdgpu_vm *)vm);
+       ret = vm_validate_pt_pd_bos(avm);
        if (unlikely(ret))
                goto unreserve_out;
 
        pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n",
                mem->va,
                mem->va + bo_size * (1 + mem->aql_queue),
-               vm);
+               avm);
 
        list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
-               if (entry->bo_va->base.vm == vm && entry->is_mapped) {
+               if (entry->bo_va->base.vm == avm && entry->is_mapped) {
                        pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n",
                                        entry->va,
                                        entry->va + bo_size,
@@ -1642,14 +1660,15 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
 
 int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
                                      struct dma_buf *dma_buf,
-                                     uint64_t va, void *vm,
+                                     uint64_t va, void *drm_priv,
                                      struct kgd_mem **mem, uint64_t *size,
                                      uint64_t *mmap_offset)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+       struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
        struct drm_gem_object *obj;
        struct amdgpu_bo *bo;
-       struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+       int ret;
 
        if (dma_buf->ops != &amdgpu_dmabuf_ops)
                /* Can't handle non-graphics buffers */
@@ -1670,6 +1689,12 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
        if (!*mem)
                return -ENOMEM;
 
+       ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
+       if (ret) {
+               kfree(mem);
+               return ret;
+       }
+
        if (size)
                *size = amdgpu_bo_size(bo);
 
@@ -2135,7 +2160,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
         */
        new_fence = amdgpu_amdkfd_fence_create(
                                process_info->eviction_fence->base.context,
-                               process_info->eviction_fence->mm);
+                               process_info->eviction_fence->mm,
+                               NULL);
        if (!new_fence) {
                pr_err("Failed to create eviction fence\n");
                ret = -ENOMEM;
index b5c7669980458480f2ff167fd55521eb18a56852..90136f9dedd6576485eef6f61c2b4195925a69c1 100644 (file)
@@ -672,7 +672,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
 }
 
 /**
- * cs_parser_fini() - clean parser states
+ * amdgpu_cs_parser_fini() - clean parser states
  * @parser:    parser structure holding parsing context.
  * @error:     error number
  * @backoff:   indicator to backoff the reservation
@@ -1488,7 +1488,7 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
 }
 
 /**
- * amdgpu_cs_wait_all_fence - wait on all fences to signal
+ * amdgpu_cs_wait_all_fences - wait on all fences to signal
  *
  * @adev: amdgpu device
  * @filp: file private
@@ -1639,7 +1639,7 @@ err_free_fences:
 }
 
 /**
- * amdgpu_cs_find_bo_va - find bo_va for VM address
+ * amdgpu_cs_find_mapping - find bo_va for VM address
  *
  * @parser: command submission parser context
  * @addr: VM address
index 8b2a37bf2adf1c670ece8ad50822a66e45c2d874..2360a9c518ebbce99771b5134c64f9b5a003f678 100644 (file)
@@ -2856,7 +2856,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
                AMD_IP_BLOCK_TYPE_IH,
        };
 
-       for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
+       for (i = 0; i < adev->num_ip_blocks; i++) {
                int j;
                struct amdgpu_ip_block *block;
 
@@ -3179,8 +3179,8 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
        int ret = 0;
 
        /*
-        * By default timeout for non compute jobs is 10000.
-        * And there is no timeout enforced on compute jobs.
+        * By default timeout for non compute jobs is 10000
+        * and 60000 for compute jobs.
         * In SR-IOV or passthrough mode, timeout for compute
         * jobs are 60000 by default.
         */
@@ -3189,10 +3189,8 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
        if (amdgpu_sriov_vf(adev))
                adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
                                        msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
-       else if (amdgpu_passthrough(adev))
-               adev->compute_timeout =  msecs_to_jiffies(60000);
        else
-               adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
+               adev->compute_timeout =  msecs_to_jiffies(60000);
 
        if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
                while ((timeout_setting = strsep(&input, ",")) &&
@@ -3741,7 +3739,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
 {
        struct amdgpu_device *adev = drm_to_adev(dev);
-       int r;
 
        if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
                return 0;
@@ -3756,7 +3753,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
 
        amdgpu_ras_suspend(adev);
 
-       r = amdgpu_device_ip_suspend_phase1(adev);
+       amdgpu_device_ip_suspend_phase1(adev);
 
        if (!adev->in_s0ix)
                amdgpu_amdkfd_suspend(adev, adev->in_runpm);
@@ -3766,7 +3763,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
 
        amdgpu_fence_driver_suspend(adev);
 
-       r = amdgpu_device_ip_suspend_phase2(adev);
+       amdgpu_device_ip_suspend_phase2(adev);
        /* evict remaining vram memory
         * This second call to evict vram is to evict the gart page table
         * using the CPU.
@@ -5124,7 +5121,8 @@ int amdgpu_device_baco_enter(struct drm_device *dev)
        if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
                return -ENOTSUPP;
 
-       if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt)
+       if (ras && adev->ras_enabled &&
+           adev->nbio.funcs->enable_doorbell_interrupt)
                adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
 
        return amdgpu_dpm_baco_enter(adev);
@@ -5143,7 +5141,8 @@ int amdgpu_device_baco_exit(struct drm_device *dev)
        if (ret)
                return ret;
 
-       if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt)
+       if (ras && adev->ras_enabled &&
+           adev->nbio.funcs->enable_doorbell_interrupt)
                adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
 
        return 0;
index 6fd20ea2935b0d98ecc84f108de1cbfa8ea80851..1ed9748b9bc77288f6150dcd16f553619aa4bf13 100644 (file)
@@ -288,9 +288,9 @@ module_param_named(msi, amdgpu_msi, int, 0444);
  *   for SDMA and Video.
  *
  * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video)
- * jobs is 10000. And there is no timeout enforced on compute jobs.
+ * jobs is 10000. The timeout for compute is 60000.
  */
-MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and infinity timeout for compute jobs; "
+MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and 60000 for compute jobs; "
                "for passthrough or sriov, 10000 for all jobs."
                " 0: keep default value. negative: infinity timeout), "
                "format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; "
@@ -641,7 +641,8 @@ module_param_named(mes, amdgpu_mes, int, 0444);
 
 /**
  * DOC: noretry (int)
- * Disable retry faults in the GPU memory controller.
+ * Disable XNACK retry in the SQ by default on GFXv9 hardware. On ASICs that
+ * do not support per-process XNACK this also disables retry page faults.
  * (0 = retry enabled, 1 = retry disabled, -1 auto (default))
  */
 MODULE_PARM_DESC(noretry,
@@ -1186,6 +1187,7 @@ static const struct pci_device_id pciidlist[] = {
        {0x1002, 0x7408, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
        {0x1002, 0x740C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
        {0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
+       {0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
 
        {0, 0, 0}
 };
@@ -1598,17 +1600,15 @@ static int amdgpu_pmops_runtime_idle(struct device *dev)
        if (amdgpu_device_has_dc_support(adev)) {
                struct drm_crtc *crtc;
 
-               drm_modeset_lock_all(drm_dev);
-
                drm_for_each_crtc(crtc, drm_dev) {
-                       if (crtc->state->active) {
+                       drm_modeset_lock(&crtc->mutex, NULL);
+                       if (crtc->state->active)
                                ret = -EBUSY;
+                       drm_modeset_unlock(&crtc->mutex);
+                       if (ret < 0)
                                break;
-                       }
                }
 
-               drm_modeset_unlock_all(drm_dev);
-
        } else {
                struct drm_connector *list_connector;
                struct drm_connector_list_iter iter;
index 47ea468596184509b4174db02437993f2ca93b33..30772608eac6c56985db06f567d82003d93ce201 100644 (file)
@@ -434,6 +434,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
  *
  * @ring: ring to init the fence driver on
  * @num_hw_submission: number of entries on the hardware queue
+ * @sched_score: optional score atomic shared with other schedulers
  *
  * Init the fence driver for the requested ring (all asics).
  * Helper function for amdgpu_fence_driver_init().
index c5a9a4fb10d2bde0767b8f76d95d633e5f44709f..5562b5c90c032155b4acfd1d81c04804e581e9b6 100644 (file)
@@ -60,7 +60,7 @@
  */
 
 /**
- * amdgpu_dummy_page_init - init dummy page used by the driver
+ * amdgpu_gart_dummy_page_init - init dummy page used by the driver
  *
  * @adev: amdgpu_device pointer
  *
@@ -86,7 +86,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
 }
 
 /**
- * amdgpu_dummy_page_fini - free dummy page used by the driver
+ * amdgpu_gart_dummy_page_fini - free dummy page used by the driver
  *
  * @adev: amdgpu_device pointer
  *
index c39ed9eb0987dfde3c035b7085531c79b44837cf..a129ecc738693fae4fccbce34eb0bdd7c20263bb 100644 (file)
@@ -332,6 +332,17 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
                        mc->agp_size >> 20, mc->agp_start, mc->agp_end);
 }
 
+/**
+ * amdgpu_gmc_fault_key - get hask key from vm fault address and pasid
+ *
+ * @addr: 48 bit physical address, page aligned (36 significant bits)
+ * @pasid: 16 bit process address space identifier
+ */
+static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid)
+{
+       return addr << 4 | pasid;
+}
+
 /**
  * amdgpu_gmc_filter_faults - filter VM faults
  *
@@ -348,8 +359,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
                              uint16_t pasid, uint64_t timestamp)
 {
        struct amdgpu_gmc *gmc = &adev->gmc;
-
-       uint64_t stamp, key = addr << 4 | pasid;
+       uint64_t stamp, key = amdgpu_gmc_fault_key(addr, pasid);
        struct amdgpu_gmc_fault *fault;
        uint32_t hash;
 
@@ -365,7 +375,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
        while (fault->timestamp >= stamp) {
                uint64_t tmp;
 
-               if (fault->key == key)
+               if (atomic64_read(&fault->key) == key)
                        return true;
 
                tmp = fault->timestamp;
@@ -378,7 +388,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
 
        /* Add the fault to the ring */
        fault = &gmc->fault_ring[gmc->last_fault];
-       fault->key = key;
+       atomic64_set(&fault->key, key);
        fault->timestamp = timestamp;
 
        /* And update the hash */
@@ -387,6 +397,36 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
        return false;
 }
 
+/**
+ * amdgpu_gmc_filter_faults_remove - remove address from VM faults filter
+ *
+ * @adev: amdgpu device structure
+ * @addr: address of the VM fault
+ * @pasid: PASID of the process causing the fault
+ *
+ * Remove the address from fault filter, then future vm fault on this address
+ * will pass to retry fault handler to recover.
+ */
+void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
+                                    uint16_t pasid)
+{
+       struct amdgpu_gmc *gmc = &adev->gmc;
+       uint64_t key = amdgpu_gmc_fault_key(addr, pasid);
+       struct amdgpu_gmc_fault *fault;
+       uint32_t hash;
+       uint64_t tmp;
+
+       hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
+       fault = &gmc->fault_ring[gmc->fault_hash[hash].idx];
+       do {
+               if (atomic64_cmpxchg(&fault->key, key, 0) == key)
+                       break;
+
+               tmp = fault->timestamp;
+               fault = &gmc->fault_ring[fault->next];
+       } while (fault->timestamp < tmp);
+}
+
 int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
 {
        int r;
@@ -415,6 +455,13 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
                        return r;
        }
 
+       if (adev->hdp.ras_funcs &&
+           adev->hdp.ras_funcs->ras_late_init) {
+               r = adev->hdp.ras_funcs->ras_late_init(adev);
+               if (r)
+                       return r;
+       }
+
        return 0;
 }
 
@@ -426,11 +473,15 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
 
        if (adev->mmhub.ras_funcs &&
            adev->mmhub.ras_funcs->ras_fini)
-               amdgpu_mmhub_ras_fini(adev);
+               adev->mmhub.ras_funcs->ras_fini(adev);
 
        if (adev->gmc.xgmi.ras_funcs &&
            adev->gmc.xgmi.ras_funcs->ras_fini)
                adev->gmc.xgmi.ras_funcs->ras_fini(adev);
+
+       if (adev->hdp.ras_funcs &&
+           adev->hdp.ras_funcs->ras_fini)
+               adev->hdp.ras_funcs->ras_fini(adev);
 }
 
        /*
index 9d11c02a39388eaae891874884905dc7d21dc080..6aa1d52d3aeed13eb41aa8757b3d8e6c7ffb5eba 100644 (file)
@@ -66,9 +66,9 @@ struct firmware;
  * GMC page fault information
  */
 struct amdgpu_gmc_fault {
-       uint64_t        timestamp;
+       uint64_t        timestamp:48;
        uint64_t        next:AMDGPU_GMC_FAULT_RING_ORDER;
-       uint64_t        key:52;
+       atomic64_t      key;
 };
 
 /*
@@ -318,6 +318,8 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev,
                             struct amdgpu_gmc *mc);
 bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
                              uint16_t pasid, uint64_t timestamp);
+void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
+                                    uint16_t pasid);
 int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
 void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
 int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
index 72962de4c04ca05ddaf9bbefa8594903d0103661..c026972ca9a1a3dc25c75fa5f97cd1471776c3d9 100644 (file)
@@ -24,7 +24,8 @@
 
 #include "amdgpu.h"
 
-static inline struct amdgpu_gtt_mgr *to_gtt_mgr(struct ttm_resource_manager *man)
+static inline struct amdgpu_gtt_mgr *
+to_gtt_mgr(struct ttm_resource_manager *man)
 {
        return container_of(man, struct amdgpu_gtt_mgr, manager);
 }
@@ -43,12 +44,14 @@ struct amdgpu_gtt_node {
  * the GTT block, in bytes
  */
 static ssize_t amdgpu_mem_info_gtt_total_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
+                                             struct device_attribute *attr,
+                                             char *buf)
 {
        struct drm_device *ddev = dev_get_drvdata(dev);
        struct amdgpu_device *adev = drm_to_adev(ddev);
-       struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
+       struct ttm_resource_manager *man;
 
+       man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
        return sysfs_emit(buf, "%llu\n", man->size * PAGE_SIZE);
 }
 
@@ -61,12 +64,14 @@ static ssize_t amdgpu_mem_info_gtt_total_show(struct device *dev,
  * size of the GTT block, in bytes
  */
 static ssize_t amdgpu_mem_info_gtt_used_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
+                                            struct device_attribute *attr,
+                                            char *buf)
 {
        struct drm_device *ddev = dev_get_drvdata(dev);
        struct amdgpu_device *adev = drm_to_adev(ddev);
-       struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
+       struct ttm_resource_manager *man;
 
+       man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
        return sysfs_emit(buf, "%llu\n", amdgpu_gtt_mgr_usage(man));
 }
 
@@ -75,80 +80,6 @@ static DEVICE_ATTR(mem_info_gtt_total, S_IRUGO,
 static DEVICE_ATTR(mem_info_gtt_used, S_IRUGO,
                   amdgpu_mem_info_gtt_used_show, NULL);
 
-static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func;
-/**
- * amdgpu_gtt_mgr_init - init GTT manager and DRM MM
- *
- * @adev: amdgpu_device pointer
- * @gtt_size: maximum size of GTT
- *
- * Allocate and initialize the GTT manager.
- */
-int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size)
-{
-       struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
-       struct ttm_resource_manager *man = &mgr->manager;
-       uint64_t start, size;
-       int ret;
-
-       man->use_tt = true;
-       man->func = &amdgpu_gtt_mgr_func;
-
-       ttm_resource_manager_init(man, gtt_size >> PAGE_SHIFT);
-
-       start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
-       size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
-       drm_mm_init(&mgr->mm, start, size);
-       spin_lock_init(&mgr->lock);
-       atomic64_set(&mgr->available, gtt_size >> PAGE_SHIFT);
-
-       ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_total);
-       if (ret) {
-               DRM_ERROR("Failed to create device file mem_info_gtt_total\n");
-               return ret;
-       }
-       ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_used);
-       if (ret) {
-               DRM_ERROR("Failed to create device file mem_info_gtt_used\n");
-               return ret;
-       }
-
-       ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager);
-       ttm_resource_manager_set_used(man, true);
-       return 0;
-}
-
-/**
- * amdgpu_gtt_mgr_fini - free and destroy GTT manager
- *
- * @adev: amdgpu_device pointer
- *
- * Destroy and free the GTT manager, returns -EBUSY if ranges are still
- * allocated inside it.
- */
-void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev)
-{
-       struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
-       struct ttm_resource_manager *man = &mgr->manager;
-       int ret;
-
-       ttm_resource_manager_set_used(man, false);
-
-       ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
-       if (ret)
-               return;
-
-       spin_lock(&mgr->lock);
-       drm_mm_takedown(&mgr->mm);
-       spin_unlock(&mgr->lock);
-
-       device_remove_file(adev->dev, &dev_attr_mem_info_gtt_total);
-       device_remove_file(adev->dev, &dev_attr_mem_info_gtt_used);
-
-       ttm_resource_manager_cleanup(man);
-       ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL);
-}
-
 /**
  * amdgpu_gtt_mgr_has_gart_addr - Check if mem has address space
  *
@@ -265,6 +196,13 @@ uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager *man)
        return (result > 0 ? result : 0) * PAGE_SIZE;
 }
 
+/**
+ * amdgpu_gtt_mgr_recover - re-init gart
+ *
+ * @man: TTM memory type manager
+ *
+ * Re-init the gart for each known BO in the GTT.
+ */
 int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man)
 {
        struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
@@ -311,3 +249,76 @@ static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func = {
        .free = amdgpu_gtt_mgr_del,
        .debug = amdgpu_gtt_mgr_debug
 };
+
+/**
+ * amdgpu_gtt_mgr_init - init GTT manager and DRM MM
+ *
+ * @adev: amdgpu_device pointer
+ * @gtt_size: maximum size of GTT
+ *
+ * Allocate and initialize the GTT manager.
+ */
+int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size)
+{
+       struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
+       struct ttm_resource_manager *man = &mgr->manager;
+       uint64_t start, size;
+       int ret;
+
+       man->use_tt = true;
+       man->func = &amdgpu_gtt_mgr_func;
+
+       ttm_resource_manager_init(man, gtt_size >> PAGE_SHIFT);
+
+       start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
+       size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
+       drm_mm_init(&mgr->mm, start, size);
+       spin_lock_init(&mgr->lock);
+       atomic64_set(&mgr->available, gtt_size >> PAGE_SHIFT);
+
+       ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_total);
+       if (ret) {
+               DRM_ERROR("Failed to create device file mem_info_gtt_total\n");
+               return ret;
+       }
+       ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_used);
+       if (ret) {
+               DRM_ERROR("Failed to create device file mem_info_gtt_used\n");
+               return ret;
+       }
+
+       ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager);
+       ttm_resource_manager_set_used(man, true);
+       return 0;
+}
+
+/**
+ * amdgpu_gtt_mgr_fini - free and destroy GTT manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Destroy and free the GTT manager, returns -EBUSY if ranges are still
+ * allocated inside it.
+ */
+void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev)
+{
+       struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
+       struct ttm_resource_manager *man = &mgr->manager;
+       int ret;
+
+       ttm_resource_manager_set_used(man, false);
+
+       ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
+       if (ret)
+               return;
+
+       spin_lock(&mgr->lock);
+       drm_mm_takedown(&mgr->mm);
+       spin_unlock(&mgr->lock);
+
+       device_remove_file(adev->dev, &dev_attr_mem_info_gtt_total);
+       device_remove_file(adev->dev, &dev_attr_mem_info_gtt_used);
+
+       ttm_resource_manager_cleanup(man);
+       ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
new file mode 100644 (file)
index 0000000..1d50d53
--- /dev/null
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+
+int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev)
+{
+       int r;
+       struct ras_ih_if ih_info = {
+               .cb = NULL,
+       };
+       struct ras_fs_if fs_info = {
+               .sysfs_name = "hdp_err_count",
+       };
+
+       if (!adev->hdp.ras_if) {
+               adev->hdp.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+               if (!adev->hdp.ras_if)
+                       return -ENOMEM;
+               adev->hdp.ras_if->block = AMDGPU_RAS_BLOCK__HDP;
+               adev->hdp.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+               adev->hdp.ras_if->sub_block_index = 0;
+               strcpy(adev->hdp.ras_if->name, "hdp");
+       }
+       ih_info.head = fs_info.head = *adev->hdp.ras_if;
+       r = amdgpu_ras_late_init(adev, adev->hdp.ras_if,
+                                &fs_info, &ih_info);
+       if (r || !amdgpu_ras_is_supported(adev, adev->hdp.ras_if->block)) {
+               kfree(adev->hdp.ras_if);
+               adev->hdp.ras_if = NULL;
+       }
+
+       return r;
+}
+
+void amdgpu_hdp_ras_fini(struct amdgpu_device *adev)
+{
+       if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP) &&
+           adev->hdp.ras_if) {
+               struct ras_common_if *ras_if = adev->hdp.ras_if;
+               struct ras_ih_if ih_info = {
+                       .cb = NULL,
+               };
+
+               amdgpu_ras_late_fini(adev, ras_if, &ih_info);
+               kfree(ras_if);
+       }
+}
index 43caf9f8cc110a8a561ba6e5a8cbdaf287be7778..7ec99d591584b0137e1a5fa8ad4f99c9d1905720 100644 (file)
 #ifndef __AMDGPU_HDP_H__
 #define __AMDGPU_HDP_H__
 
+struct amdgpu_hdp_ras_funcs {
+       int (*ras_late_init)(struct amdgpu_device *adev);
+       void (*ras_fini)(struct amdgpu_device *adev);
+       void (*query_ras_error_count)(struct amdgpu_device *adev,
+                                     void *ras_error_status);
+       void (*reset_ras_error_count)(struct amdgpu_device *adev);
+};
+
 struct amdgpu_hdp_funcs {
        void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
        void (*invalidate_hdp)(struct amdgpu_device *adev,
                               struct amdgpu_ring *ring);
-       void (*reset_ras_error_count)(struct amdgpu_device *adev);
        void (*update_clock_gating)(struct amdgpu_device *adev, bool enable);
        void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags);
        void (*init_registers)(struct amdgpu_device *adev);
 };
 
 struct amdgpu_hdp {
+       struct ras_common_if                    *ras_if;
        const struct amdgpu_hdp_funcs           *funcs;
+       const struct amdgpu_hdp_ras_funcs       *ras_funcs;
 };
 
+int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev);
+void amdgpu_hdp_ras_fini(struct amdgpu_device *adev);
 #endif /* __AMDGPU_HDP_H__ */
index a2fe2dac32c16f1f0d35e111b2a70cf04dcb18b8..2e6789a7dc464725e282a36799959d5e1ca95820 100644 (file)
@@ -328,7 +328,7 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
 
        for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) {
                if (i == AMDGPU_IB_POOL_DIRECT)
-                       size = PAGE_SIZE * 2;
+                       size = PAGE_SIZE * 6;
                else
                        size = AMDGPU_IB_POOL_SIZE;
 
index faaa6aa2faaf2aba9c32bdd4497c985cbfbc4571..a36e191cf0860e3c907b6fdd1d13aa8ae4bced61 100644 (file)
@@ -175,7 +175,9 @@ static bool amdgpu_ih_has_checkpoint_processed(struct amdgpu_device *adev,
                cur_rptr += ih->ptr_mask + 1;
        *prev_rptr = cur_rptr;
 
-       return cur_rptr >= checkpoint_wptr;
+       /* check ring is empty to workaround missing wptr overflow flag */
+       return cur_rptr >= checkpoint_wptr ||
+              (cur_rptr & ih->ptr_mask) == amdgpu_ih_get_wptr(adev, ih);
 }
 
 /**
index 39ee88d29cca1f3d0d06dfd1c487172820ec4362..8d12e474745addf0eb62695a72da5a825ae23040 100644 (file)
@@ -986,7 +986,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 
                if (!ras)
                        return -EINVAL;
-               ras_mask = (uint64_t)ras->supported << 32 | ras->features;
+               ras_mask = (uint64_t)adev->ras_enabled << 32 | ras->features;
 
                return copy_to_user(out, &ras_mask,
                                min_t(u64, size, sizeof(ras_mask))) ?
@@ -1114,7 +1114,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
                dev_warn(adev->dev, "No more PASIDs available!");
                pasid = 0;
        }
-       r = amdgpu_vm_init(adev, &fpriv->vm, AMDGPU_VM_CONTEXT_GFX, pasid);
+
+       r = amdgpu_vm_init(adev, &fpriv->vm, pasid);
        if (r)
                goto error_pasid;
 
index 11aa29933c1f8c968101bb8b0b09a2f8f2251f68..b27fcbccce2b6f27dcb70ec99527323db075863d 100644 (file)
@@ -28,6 +28,7 @@ struct amdgpu_mmhub_ras_funcs {
                                      void *ras_error_status);
        void (*query_ras_error_status)(struct amdgpu_device *adev);
        void (*reset_ras_error_count)(struct amdgpu_device *adev);
+       void (*reset_ras_error_status)(struct amdgpu_device *adev);
 };
 
 struct amdgpu_mmhub_funcs {
index 828b5167ff128f0a2f1c17078267bf0a03a18818..2741c28ff1b56ef7daeac8849582b8ac441d65d1 100644 (file)
@@ -155,3 +155,89 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
        mmu_interval_notifier_remove(&bo->notifier);
        bo->notifier.mm = NULL;
 }
+
+int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
+                              struct mm_struct *mm, struct page **pages,
+                              uint64_t start, uint64_t npages,
+                              struct hmm_range **phmm_range, bool readonly,
+                              bool mmap_locked)
+{
+       struct hmm_range *hmm_range;
+       unsigned long timeout;
+       unsigned long i;
+       unsigned long *pfns;
+       int r = 0;
+
+       hmm_range = kzalloc(sizeof(*hmm_range), GFP_KERNEL);
+       if (unlikely(!hmm_range))
+               return -ENOMEM;
+
+       pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
+       if (unlikely(!pfns)) {
+               r = -ENOMEM;
+               goto out_free_range;
+       }
+
+       hmm_range->notifier = notifier;
+       hmm_range->default_flags = HMM_PFN_REQ_FAULT;
+       if (!readonly)
+               hmm_range->default_flags |= HMM_PFN_REQ_WRITE;
+       hmm_range->hmm_pfns = pfns;
+       hmm_range->start = start;
+       hmm_range->end = start + npages * PAGE_SIZE;
+
+       /* Assuming 512MB takes maxmium 1 second to fault page address */
+       timeout = max(npages >> 17, 1ULL) * HMM_RANGE_DEFAULT_TIMEOUT;
+       timeout = jiffies + msecs_to_jiffies(timeout);
+
+retry:
+       hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
+
+       if (likely(!mmap_locked))
+               mmap_read_lock(mm);
+
+       r = hmm_range_fault(hmm_range);
+
+       if (likely(!mmap_locked))
+               mmap_read_unlock(mm);
+       if (unlikely(r)) {
+               /*
+                * FIXME: This timeout should encompass the retry from
+                * mmu_interval_read_retry() as well.
+                */
+               if (r == -EBUSY && !time_after(jiffies, timeout))
+                       goto retry;
+               goto out_free_pfns;
+       }
+
+       /*
+        * Due to default_flags, all pages are HMM_PFN_VALID or
+        * hmm_range_fault() fails. FIXME: The pages cannot be touched outside
+        * the notifier_lock, and mmu_interval_read_retry() must be done first.
+        */
+       for (i = 0; pages && i < npages; i++)
+               pages[i] = hmm_pfn_to_page(pfns[i]);
+
+       *phmm_range = hmm_range;
+
+       return 0;
+
+out_free_pfns:
+       kvfree(pfns);
+out_free_range:
+       kfree(hmm_range);
+
+       return r;
+}
+
+int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range)
+{
+       int r;
+
+       r = mmu_interval_read_retry(hmm_range->notifier,
+                                   hmm_range->notifier_seq);
+       kvfree(hmm_range->hmm_pfns);
+       kfree(hmm_range);
+
+       return r;
+}
index a292238f75ebaefcc7d3a8feef43d2479d59de71..7f7d37a457c32c2241d73b4267f5b6d26bce0b2c 100644 (file)
 #include <linux/workqueue.h>
 #include <linux/interval_tree.h>
 
+int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
+                              struct mm_struct *mm, struct page **pages,
+                              uint64_t start, uint64_t npages,
+                              struct hmm_range **phmm_range, bool readonly,
+                              bool mmap_locked);
+int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range);
+
 #if defined(CONFIG_HMM_MIRROR)
 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
 void amdgpu_mn_unregister(struct amdgpu_bo *bo);
index 0adffcace3263522521b9122579f6ba9f43bc075..8714d50c5b20280a70e6ca6418b05f99bb250397 100644 (file)
@@ -491,7 +491,18 @@ bool amdgpu_bo_support_uswc(u64 bo_flags)
 #endif
 }
 
-static int amdgpu_bo_do_create(struct amdgpu_device *adev,
+/**
+ * amdgpu_bo_create - create an &amdgpu_bo buffer object
+ * @adev: amdgpu device object
+ * @bp: parameters to be used for the buffer object
+ * @bo_ptr: pointer to the buffer object pointer
+ *
+ * Creates an &amdgpu_bo buffer object.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_bo_create(struct amdgpu_device *adev,
                               struct amdgpu_bo_param *bp,
                               struct amdgpu_bo **bo_ptr)
 {
@@ -601,9 +612,9 @@ fail_unreserve:
        return r;
 }
 
-static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
-                                  unsigned long size,
-                                  struct amdgpu_bo *bo)
+int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
+                           unsigned long size,
+                           struct amdgpu_bo *bo)
 {
        struct amdgpu_bo_param bp;
        int r;
@@ -614,13 +625,12 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
        memset(&bp, 0, sizeof(bp));
        bp.size = size;
        bp.domain = AMDGPU_GEM_DOMAIN_GTT;
-       bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC |
-               AMDGPU_GEM_CREATE_SHADOW;
+       bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
        bp.type = ttm_bo_type_kernel;
        bp.resv = bo->tbo.base.resv;
        bp.bo_ptr_size = sizeof(struct amdgpu_bo);
 
-       r = amdgpu_bo_do_create(adev, &bp, &bo->shadow);
+       r = amdgpu_bo_create(adev, &bp, &bo->shadow);
        if (!r) {
                bo->shadow->parent = amdgpu_bo_ref(bo);
                mutex_lock(&adev->shadow_list_lock);
@@ -631,50 +641,6 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
        return r;
 }
 
-/**
- * amdgpu_bo_create - create an &amdgpu_bo buffer object
- * @adev: amdgpu device object
- * @bp: parameters to be used for the buffer object
- * @bo_ptr: pointer to the buffer object pointer
- *
- * Creates an &amdgpu_bo buffer object; and if requested, also creates a
- * shadow object.
- * Shadow object is used to backup the original buffer object, and is always
- * in GTT.
- *
- * Returns:
- * 0 for success or a negative error code on failure.
- */
-int amdgpu_bo_create(struct amdgpu_device *adev,
-                    struct amdgpu_bo_param *bp,
-                    struct amdgpu_bo **bo_ptr)
-{
-       u64 flags = bp->flags;
-       int r;
-
-       bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW;
-
-       r = amdgpu_bo_do_create(adev, bp, bo_ptr);
-       if (r)
-               return r;
-
-       if ((flags & AMDGPU_GEM_CREATE_SHADOW) && !(adev->flags & AMD_IS_APU)) {
-               if (!bp->resv)
-                       WARN_ON(dma_resv_lock((*bo_ptr)->tbo.base.resv,
-                                                       NULL));
-
-               r = amdgpu_bo_create_shadow(adev, bp->size, *bo_ptr);
-
-               if (!bp->resv)
-                       dma_resv_unlock((*bo_ptr)->tbo.base.resv);
-
-               if (r)
-                       amdgpu_bo_unref(bo_ptr);
-       }
-
-       return r;
-}
-
 /**
  * amdgpu_bo_create_user - create an &amdgpu_bo_user buffer object
  * @adev: amdgpu device object
@@ -694,9 +660,8 @@ int amdgpu_bo_create_user(struct amdgpu_device *adev,
        struct amdgpu_bo *bo_ptr;
        int r;
 
-       bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW;
        bp->bo_ptr_size = sizeof(struct amdgpu_bo_user);
-       r = amdgpu_bo_do_create(adev, bp, &bo_ptr);
+       r = amdgpu_bo_create(adev, bp, &bo_ptr);
        if (r)
                return r;
 
@@ -1595,7 +1560,6 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
        amdgpu_bo_print_flag(m, bo, NO_CPU_ACCESS);
        amdgpu_bo_print_flag(m, bo, CPU_GTT_USWC);
        amdgpu_bo_print_flag(m, bo, VRAM_CLEARED);
-       amdgpu_bo_print_flag(m, bo, SHADOW);
        amdgpu_bo_print_flag(m, bo, VRAM_CONTIGUOUS);
        amdgpu_bo_print_flag(m, bo, VM_ALWAYS_VALID);
        amdgpu_bo_print_flag(m, bo, EXPLICIT_SYNC);
index b37d36ac6b5a439e24a512820a25654107db4092..46d22ab85492cec79ab4670078add98171e8a51b 100644 (file)
 #define AMDGPU_BO_INVALID_OFFSET       LONG_MAX
 #define AMDGPU_BO_MAX_PLACEMENTS       3
 
+/* BO flag to indicate a KFD userptr BO */
+#define AMDGPU_AMDKFD_CREATE_USERPTR_BO        (1ULL << 63)
+#define AMDGPU_AMDKFD_CREATE_SVM_BO    (1ULL << 62)
+
 #define to_amdgpu_bo_user(abo) container_of((abo), struct amdgpu_bo_user, bo)
 
 struct amdgpu_bo_param {
@@ -267,6 +271,9 @@ int amdgpu_bo_create_user(struct amdgpu_device *adev,
                          struct amdgpu_bo_user **ubo_ptr);
 void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
                           void **cpu_addr);
+int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
+                           unsigned long size,
+                           struct amdgpu_bo *bo);
 int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr);
 void *amdgpu_bo_kptr(struct amdgpu_bo *bo);
 void amdgpu_bo_kunmap(struct amdgpu_bo *bo);
index a09483beb9685042914cca661f85b8ca12948bcd..f7bbb04d01eece005c0523914f1c437c5ed938d6 100644 (file)
@@ -417,31 +417,12 @@ static int psp_tmr_init(struct psp_context *psp)
        return ret;
 }
 
-static int psp_clear_vf_fw(struct psp_context *psp)
-{
-       int ret;
-       struct psp_gfx_cmd_resp *cmd;
-
-       if (!amdgpu_sriov_vf(psp->adev) || psp->adev->asic_type != CHIP_NAVI12)
-               return 0;
-
-       cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
-       if (!cmd)
-               return -ENOMEM;
-
-       cmd->cmd_id = GFX_CMD_ID_CLEAR_VF_FW;
-
-       ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
-       kfree(cmd);
-
-       return ret;
-}
-
 static bool psp_skip_tmr(struct psp_context *psp)
 {
        switch (psp->adev->asic_type) {
        case CHIP_NAVI12:
        case CHIP_SIENNA_CICHLID:
+       case CHIP_ALDEBARAN:
                return true;
        default:
                return false;
@@ -1037,6 +1018,13 @@ static int psp_ras_load(struct psp_context *psp)
        memset(psp->fw_pri_buf, 0, PSP_1_MEG);
        memcpy(psp->fw_pri_buf, psp->ta_ras_start_addr, psp->ta_ras_ucode_size);
 
+       ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf;
+
+       if (psp->adev->gmc.xgmi.connected_to_cpu)
+               ras_cmd->ras_in_message.init_flags.poison_mode_en = 1;
+       else
+               ras_cmd->ras_in_message.init_flags.dgpu_mode = 1;
+
        psp_prep_ta_load_cmd_buf(cmd,
                                 psp->fw_pri_mc_addr,
                                 psp->ta_ras_ucode_size,
@@ -1046,8 +1034,6 @@ static int psp_ras_load(struct psp_context *psp)
        ret = psp_cmd_submit_buf(psp, NULL, cmd,
                        psp->fence_buf_mc_addr);
 
-       ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf;
-
        if (!ret) {
                psp->ras.session_id = cmd->resp.session_id;
 
@@ -1128,6 +1114,31 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
        return ret;
 }
 
+static int psp_ras_status_to_errno(struct amdgpu_device *adev,
+                                        enum ta_ras_status ras_status)
+{
+       int ret = -EINVAL;
+
+       switch (ras_status) {
+       case TA_RAS_STATUS__SUCCESS:
+               ret = 0;
+               break;
+       case TA_RAS_STATUS__RESET_NEEDED:
+               ret = -EAGAIN;
+               break;
+       case TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE:
+               dev_warn(adev->dev, "RAS WARN: ras function unavailable\n");
+               break;
+       case TA_RAS_STATUS__ERROR_ASD_READ_WRITE:
+               dev_warn(adev->dev, "RAS WARN: asd read or write failed\n");
+               break;
+       default:
+               dev_err(adev->dev, "RAS ERROR: ras function failed ret 0x%X\n", ret);
+       }
+
+       return ret;
+}
+
 int psp_ras_enable_features(struct psp_context *psp,
                union ta_ras_cmd_input *info, bool enable)
 {
@@ -1151,7 +1162,7 @@ int psp_ras_enable_features(struct psp_context *psp,
        if (ret)
                return -EINVAL;
 
-       return ras_cmd->ras_status;
+       return psp_ras_status_to_errno(psp->adev, ras_cmd->ras_status);
 }
 
 static int psp_ras_terminate(struct psp_context *psp)
@@ -1234,7 +1245,7 @@ int psp_ras_trigger_error(struct psp_context *psp,
        if (amdgpu_ras_intr_triggered())
                return 0;
 
-       return ras_cmd->ras_status;
+       return psp_ras_status_to_errno(psp->adev, ras_cmd->ras_status);
 }
 // ras end
 
@@ -1920,12 +1931,6 @@ static int psp_hw_start(struct psp_context *psp)
                return ret;
        }
 
-       ret = psp_clear_vf_fw(psp);
-       if (ret) {
-               DRM_ERROR("PSP clear vf fw!\n");
-               return ret;
-       }
-
        ret = psp_boot_config_set(adev);
        if (ret) {
                DRM_WARN("PSP set boot config@\n");
@@ -2166,7 +2171,7 @@ static int psp_load_smu_fw(struct psp_context *psp)
                return 0;
 
        if ((amdgpu_in_reset(adev) &&
-            ras && ras->supported &&
+            ras && adev->ras_enabled &&
             (adev->asic_type == CHIP_ARCTURUS ||
              adev->asic_type == CHIP_VEGA20)) ||
             (adev->in_runpm &&
@@ -2434,7 +2439,6 @@ static int psp_hw_fini(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        struct psp_context *psp = &adev->psp;
-       int ret;
 
        if (psp->adev->psp.ta_fw) {
                psp_ras_terminate(psp);
@@ -2445,11 +2449,6 @@ static int psp_hw_fini(void *handle)
        }
 
        psp_asd_unload(psp);
-       ret = psp_clear_vf_fw(psp);
-       if (ret) {
-               DRM_ERROR("PSP clear vf fw!\n");
-               return ret;
-       }
 
        psp_tmr_terminate(psp);
        psp_ring_destroy(psp, PSP_RING_TYPE__KM);
index b0d2fc9454caadb0d5e5410a63022406c01ac835..b1c57a5b6e898215f764bf09b652686f87277559 100644 (file)
@@ -33,6 +33,7 @@
 #include "amdgpu_atomfirmware.h"
 #include "amdgpu_xgmi.h"
 #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
+#include "atom.h"
 
 static const char *RAS_FS_NAME = "ras";
 
@@ -320,11 +321,14 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
  * "disable" requires only the block.
  * "enable" requires the block and error type.
  * "inject" requires the block, error type, address, and value.
+ *
  * The block is one of: umc, sdma, gfx, etc.
  *     see ras_block_string[] for details
+ *
  * The error type is one of: ue, ce, where,
  *     ue is multi-uncorrectable
  *     ce is single-correctable
+ *
  * The sub-block is a the sub-block index, pass 0 if there is no sub-block.
  * The address and value are hexadecimal numbers, leading 0x is optional.
  *
@@ -531,7 +535,7 @@ static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev,
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
        struct ras_manager *obj;
 
-       if (!adev->ras_features || !con)
+       if (!adev->ras_enabled || !con)
                return NULL;
 
        if (head->block >= AMDGPU_RAS_BLOCK_COUNT)
@@ -558,7 +562,7 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
        struct ras_manager *obj;
        int i;
 
-       if (!adev->ras_features || !con)
+       if (!adev->ras_enabled || !con)
                return NULL;
 
        if (head) {
@@ -585,36 +589,11 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
 }
 /* obj end */
 
-static void amdgpu_ras_parse_status_code(struct amdgpu_device *adev,
-                                        const char* invoke_type,
-                                        const char* block_name,
-                                        enum ta_ras_status ret)
-{
-       switch (ret) {
-       case TA_RAS_STATUS__SUCCESS:
-               return;
-       case TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE:
-               dev_warn(adev->dev,
-                       "RAS WARN: %s %s currently unavailable\n",
-                       invoke_type,
-                       block_name);
-               break;
-       default:
-               dev_err(adev->dev,
-                       "RAS ERROR: %s %s error failed ret 0x%X\n",
-                       invoke_type,
-                       block_name,
-                       ret);
-       }
-}
-
 /* feature ctl begin */
 static int amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev,
-               struct ras_common_if *head)
+                                        struct ras_common_if *head)
 {
-       struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
-
-       return con->hw_supported & BIT(head->block);
+       return adev->ras_hw_enabled & BIT(head->block);
 }
 
 static int amdgpu_ras_is_feature_enabled(struct amdgpu_device *adev,
@@ -658,11 +637,7 @@ static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev,
                con->features |= BIT(head->block);
        } else {
                if (obj && amdgpu_ras_is_feature_enabled(adev, head)) {
-                       /* skip clean gfx ras context feature for VEGA20 Gaming.
-                        * will clean later
-                        */
-                       if (!(!adev->ras_features && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)))
-                               con->features &= ~BIT(head->block);
+                       con->features &= ~BIT(head->block);
                        put_obj(obj);
                }
        }
@@ -708,15 +683,10 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
        if (!amdgpu_ras_intr_triggered()) {
                ret = psp_ras_enable_features(&adev->psp, info, enable);
                if (ret) {
-                       amdgpu_ras_parse_status_code(adev,
-                                                    enable ? "enable":"disable",
-                                                    ras_block_str(head->block),
-                                                   (enum ta_ras_status)ret);
-                       if (ret == TA_RAS_STATUS__RESET_NEEDED)
-                               ret = -EAGAIN;
-                       else
-                               ret = -EINVAL;
-
+                       dev_err(adev->dev, "ras %s %s failed %d\n",
+                               enable ? "enable":"disable",
+                               ras_block_str(head->block),
+                               ret);
                        goto out;
                }
        }
@@ -770,6 +740,10 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
                                con->features |= BIT(head->block);
 
                        ret = amdgpu_ras_feature_enable(adev, head, 0);
+
+                       /* clean gfx block ras features flag */
+                       if (adev->ras_enabled && head->block == AMDGPU_RAS_BLOCK__GFX)
+                               con->features &= ~BIT(head->block);
                }
        } else
                ret = amdgpu_ras_feature_enable(adev, head, enable);
@@ -890,6 +864,11 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
                    adev->gmc.xgmi.ras_funcs->query_ras_error_count)
                        adev->gmc.xgmi.ras_funcs->query_ras_error_count(adev, &err_data);
                break;
+       case AMDGPU_RAS_BLOCK__HDP:
+               if (adev->hdp.ras_funcs &&
+                   adev->hdp.ras_funcs->query_ras_error_count)
+                       adev->hdp.ras_funcs->query_ras_error_count(adev, &err_data);
+               break;
        default:
                break;
        }
@@ -901,17 +880,42 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
        info->ce_count = obj->err_data.ce_count;
 
        if (err_data.ce_count) {
-               dev_info(adev->dev, "%ld correctable hardware errors "
+               if (adev->smuio.funcs &&
+                   adev->smuio.funcs->get_socket_id &&
+                   adev->smuio.funcs->get_die_id) {
+                       dev_info(adev->dev, "socket: %d, die: %d "
+                                       "%ld correctable hardware errors "
+                                       "detected in %s block, no user "
+                                       "action is needed.\n",
+                                       adev->smuio.funcs->get_socket_id(adev),
+                                       adev->smuio.funcs->get_die_id(adev),
+                                       obj->err_data.ce_count,
+                                       ras_block_str(info->head.block));
+               } else {
+                       dev_info(adev->dev, "%ld correctable hardware errors "
                                        "detected in %s block, no user "
                                        "action is needed.\n",
                                        obj->err_data.ce_count,
                                        ras_block_str(info->head.block));
+               }
        }
        if (err_data.ue_count) {
-               dev_info(adev->dev, "%ld uncorrectable hardware errors "
+               if (adev->smuio.funcs &&
+                   adev->smuio.funcs->get_socket_id &&
+                   adev->smuio.funcs->get_die_id) {
+                       dev_info(adev->dev, "socket: %d, die: %d "
+                                       "%ld uncorrectable hardware errors "
+                                       "detected in %s block\n",
+                                       adev->smuio.funcs->get_socket_id(adev),
+                                       adev->smuio.funcs->get_die_id(adev),
+                                       obj->err_data.ue_count,
+                                       ras_block_str(info->head.block));
+               } else {
+                       dev_info(adev->dev, "%ld uncorrectable hardware errors "
                                        "detected in %s block\n",
                                        obj->err_data.ue_count,
                                        ras_block_str(info->head.block));
+               }
        }
 
        return 0;
@@ -937,11 +941,20 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
                if (adev->mmhub.ras_funcs &&
                    adev->mmhub.ras_funcs->reset_ras_error_count)
                        adev->mmhub.ras_funcs->reset_ras_error_count(adev);
+
+               if (adev->mmhub.ras_funcs &&
+                   adev->mmhub.ras_funcs->reset_ras_error_status)
+                       adev->mmhub.ras_funcs->reset_ras_error_status(adev);
                break;
        case AMDGPU_RAS_BLOCK__SDMA:
                if (adev->sdma.funcs->reset_ras_error_count)
                        adev->sdma.funcs->reset_ras_error_count(adev);
                break;
+       case AMDGPU_RAS_BLOCK__HDP:
+               if (adev->hdp.ras_funcs &&
+                   adev->hdp.ras_funcs->reset_ras_error_count)
+                       adev->hdp.ras_funcs->reset_ras_error_count(adev);
+               break;
        default:
                break;
        }
@@ -1022,10 +1035,9 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
                ret = -EINVAL;
        }
 
-       amdgpu_ras_parse_status_code(adev,
-                                    "inject",
-                                    ras_block_str(info->head.block),
-                                    (enum ta_ras_status)ret);
+       if (ret)
+               dev_err(adev->dev, "ras inject %s failed %d\n",
+                       ras_block_str(info->head.block), ret);
 
        return ret;
 }
@@ -1038,7 +1050,7 @@ unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
        struct ras_manager *obj;
        struct ras_err_data data = {0, 0};
 
-       if (!adev->ras_features || !con)
+       if (!adev->ras_enabled || !con)
                return 0;
 
        list_for_each_entry(obj, &con->head, node) {
@@ -1265,8 +1277,8 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
 static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
 {
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
-       struct dentry *dir;
-       struct drm_minor *minor = adev_to_drm(adev)->primary;
+       struct drm_minor  *minor = adev_to_drm(adev)->primary;
+       struct dentry     *dir;
 
        dir = debugfs_create_dir(RAS_FS_NAME, minor->debugfs_root);
        debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, dir, adev,
@@ -1275,6 +1287,8 @@ static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *
                            &amdgpu_ras_debugfs_eeprom_ops);
        debugfs_create_u32("bad_page_cnt_threshold", 0444, dir,
                           &con->bad_page_cnt_threshold);
+       debugfs_create_x32("ras_hw_enabled", 0444, dir, &adev->ras_hw_enabled);
+       debugfs_create_x32("ras_enabled", 0444, dir, &adev->ras_enabled);
 
        /*
         * After one uncorrectable error happens, usually GPU recovery will
@@ -1561,7 +1575,7 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
        struct ras_manager *obj;
 
-       if (!adev->ras_features || !con)
+       if (!adev->ras_enabled || !con)
                return;
 
        list_for_each_entry(obj, &con->head, node) {
@@ -1611,7 +1625,7 @@ static void amdgpu_ras_query_err_status(struct amdgpu_device *adev)
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
        struct ras_manager *obj;
 
-       if (!adev->ras_features || !con)
+       if (!adev->ras_enabled || !con)
                return;
 
        list_for_each_entry(obj, &con->head, node) {
@@ -1925,7 +1939,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
        bool exc_err_limit = false;
        int ret;
 
-       if (adev->ras_features && con)
+       if (adev->ras_enabled && con)
                data = &con->eh_data;
        else
                return 0;
@@ -2028,6 +2042,23 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
                adev->asic_type == CHIP_SIENNA_CICHLID;
 }
 
+/*
+ * this is workaround for vega20 workstation sku,
+ * force enable gfx ras, ignore vbios gfx ras flag
+ * due to GC EDC can not write
+ */
+static void amdgpu_ras_get_quirks(struct amdgpu_device *adev)
+{
+       struct atom_context *ctx = adev->mode_info.atom_context;
+
+       if (!ctx)
+               return;
+
+       if (strnstr(ctx->vbios_version, "D16406",
+                   sizeof(ctx->vbios_version)))
+               adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX);
+}
+
 /*
  * check hardware's ras ability which will be saved in hw_supported.
  * if hardware does not support ras, we can skip some ras initializtion and
@@ -2037,11 +2068,9 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
  * we have to initialize ras as normal. but need check if operation is
  * allowed or not in each function.
  */
-static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
-               uint32_t *hw_supported, uint32_t *supported)
+static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
 {
-       *hw_supported = 0;
-       *supported = 0;
+       adev->ras_hw_enabled = adev->ras_enabled = 0;
 
        if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw ||
            !amdgpu_ras_asic_supported(adev))
@@ -2050,33 +2079,34 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
        if (!adev->gmc.xgmi.connected_to_cpu) {
                if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
                        dev_info(adev->dev, "MEM ECC is active.\n");
-                       *hw_supported |= (1 << AMDGPU_RAS_BLOCK__UMC |
-                                       1 << AMDGPU_RAS_BLOCK__DF);
+                       adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC |
+                                                  1 << AMDGPU_RAS_BLOCK__DF);
                } else {
                        dev_info(adev->dev, "MEM ECC is not presented.\n");
                }
 
                if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
                        dev_info(adev->dev, "SRAM ECC is active.\n");
-                       *hw_supported |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
-                                       1 << AMDGPU_RAS_BLOCK__DF);
+                       adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
+                                                   1 << AMDGPU_RAS_BLOCK__DF);
                } else {
                        dev_info(adev->dev, "SRAM ECC is not presented.\n");
                }
        } else {
                /* driver only manages a few IP blocks RAS feature
                 * when GPU is connected cpu through XGMI */
-               *hw_supported |= (1 << AMDGPU_RAS_BLOCK__GFX |
-                               1 << AMDGPU_RAS_BLOCK__SDMA |
-                               1 << AMDGPU_RAS_BLOCK__MMHUB);
+               adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX |
+                                          1 << AMDGPU_RAS_BLOCK__SDMA |
+                                          1 << AMDGPU_RAS_BLOCK__MMHUB);
        }
 
+       amdgpu_ras_get_quirks(adev);
+
        /* hw_supported needs to be aligned with RAS block mask. */
-       *hw_supported &= AMDGPU_RAS_BLOCK_MASK;
+       adev->ras_hw_enabled &= AMDGPU_RAS_BLOCK_MASK;
 
-       *supported = amdgpu_ras_enable == 0 ?
-                       0 : *hw_supported & amdgpu_ras_mask;
-       adev->ras_features = *supported;
+       adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 :
+               adev->ras_hw_enabled & amdgpu_ras_mask;
 }
 
 int amdgpu_ras_init(struct amdgpu_device *adev)
@@ -2097,13 +2127,13 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
 
        amdgpu_ras_set_context(adev, con);
 
-       amdgpu_ras_check_supported(adev, &con->hw_supported,
-                       &con->supported);
-       if (!con->hw_supported || (adev->asic_type == CHIP_VEGA10)) {
+       amdgpu_ras_check_supported(adev);
+
+       if (!adev->ras_enabled || adev->asic_type == CHIP_VEGA10) {
                /* set gfx block ras context feature for VEGA20 Gaming
                 * send ras disable cmd to ras ta during ras late init.
                 */
-               if (!adev->ras_features && adev->asic_type == CHIP_VEGA20) {
+               if (!adev->ras_enabled && adev->asic_type == CHIP_VEGA20) {
                        con->features |= BIT(AMDGPU_RAS_BLOCK__GFX);
 
                        return 0;
@@ -2153,8 +2183,9 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
        }
 
        dev_info(adev->dev, "RAS INFO: ras initialized successfully, "
-                       "hardware ability[%x] ras_mask[%x]\n",
-                       con->hw_supported, con->supported);
+                "hardware ability[%x] ras_mask[%x]\n",
+                adev->ras_hw_enabled, adev->ras_enabled);
+
        return 0;
 release_con:
        amdgpu_ras_set_context(adev, NULL);
@@ -2268,7 +2299,7 @@ void amdgpu_ras_resume(struct amdgpu_device *adev)
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
        struct ras_manager *obj, *tmp;
 
-       if (!adev->ras_features || !con) {
+       if (!adev->ras_enabled || !con) {
                /* clean ras context for VEGA20 Gaming after send ras disable cmd */
                amdgpu_release_ras_context(adev);
 
@@ -2314,7 +2345,7 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev)
 {
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 
-       if (!adev->ras_features || !con)
+       if (!adev->ras_enabled || !con)
                return;
 
        amdgpu_ras_disable_all_features(adev, 0);
@@ -2328,7 +2359,7 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
 {
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 
-       if (!adev->ras_features || !con)
+       if (!adev->ras_enabled || !con)
                return 0;
 
        /* Need disable ras on all IPs here before ip [hw/sw]fini */
@@ -2341,7 +2372,7 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
 {
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 
-       if (!adev->ras_features || !con)
+       if (!adev->ras_enabled || !con)
                return 0;
 
        amdgpu_ras_fs_fini(adev);
@@ -2360,10 +2391,8 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
 
 void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
 {
-       uint32_t hw_supported, supported;
-
-       amdgpu_ras_check_supported(adev, &hw_supported, &supported);
-       if (!hw_supported)
+       amdgpu_ras_check_supported(adev);
+       if (!adev->ras_hw_enabled)
                return;
 
        if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) {
@@ -2392,7 +2421,7 @@ void amdgpu_release_ras_context(struct amdgpu_device *adev)
        if (!con)
                return;
 
-       if (!adev->ras_features && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)) {
+       if (!adev->ras_enabled && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)) {
                con->features &= ~BIT(AMDGPU_RAS_BLOCK__GFX);
                amdgpu_ras_set_context(adev, NULL);
                kfree(con);
index 60df268a0c66d25f4e6a159e5186d0e639b81821..201fbdee1d090ad94ec1a973fd108f8a4af91181 100644 (file)
@@ -313,9 +313,6 @@ struct ras_common_if {
 struct amdgpu_ras {
        /* ras infrastructure */
        /* for ras itself. */
-       uint32_t hw_supported;
-       /* for IP to check its ras ability. */
-       uint32_t supported;
        uint32_t features;
        struct list_head head;
        /* sysfs */
@@ -478,7 +475,7 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
 
        if (block >= AMDGPU_RAS_BLOCK_COUNT)
                return 0;
-       return ras && (ras->supported & (1 << block));
+       return ras && (adev->ras_enabled & (1 << block));
 }
 
 int amdgpu_ras_recovery_init(struct amdgpu_device *adev);
index 688624ebe42110c314a4f4e551cf96509919d6f7..7b634a1517f9c1e90c0f8c61f4e8d9735fe279e5 100644 (file)
@@ -158,6 +158,7 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
  * @irq_src: interrupt source to use for this ring
  * @irq_type: interrupt type to use for this ring
  * @hw_prio: ring priority (NORMAL/HIGH)
+ * @sched_score: optional score atomic shared with other schedulers
  *
  * Initialize the driver information for the selected ring (all asics).
  * Returns 0 on success, error on failure.
index b860ec913ac5c1e5cb0781f9fb869c9ed321d5d8..484bb3dcec473eb2defd036692fc745caa91bf5f 100644 (file)
@@ -29,6 +29,7 @@ struct amdgpu_smuio_funcs {
        void (*update_rom_clock_gating)(struct amdgpu_device *adev, bool enable);
        void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags);
        u32 (*get_die_id)(struct amdgpu_device *adev);
+       u32 (*get_socket_id)(struct amdgpu_device *adev);
        bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev);
 };
 
index 8c7ec09eb1a4f93e0d98c948948c6f427f5fa6a1..10391fcff3437bca5ff0b802b702f73b2edea6a7 100644 (file)
@@ -32,7 +32,6 @@
 
 #include <linux/dma-mapping.h>
 #include <linux/iommu.h>
-#include <linux/hmm.h>
 #include <linux/pagemap.h>
 #include <linux/sched/task.h>
 #include <linux/sched/mm.h>
@@ -112,6 +111,20 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
        }
 
        abo = ttm_to_amdgpu_bo(bo);
+       if (abo->flags & AMDGPU_AMDKFD_CREATE_SVM_BO) {
+               struct dma_fence *fence;
+               struct dma_resv *resv = &bo->base._resv;
+
+               rcu_read_lock();
+               fence = rcu_dereference(resv->fence_excl);
+               if (fence && !fence->ops->signaled)
+                       dma_fence_enable_sw_signaling(fence);
+
+               placement->num_placement = 0;
+               placement->num_busy_placement = 0;
+               rcu_read_unlock();
+               return;
+       }
        switch (bo->mem.mem_type) {
        case AMDGPU_PL_GDS:
        case AMDGPU_PL_GWS:
@@ -165,13 +178,6 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
 {
        struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
 
-       /*
-        * Don't verify access for KFD BOs. They don't have a GEM
-        * object associated with them.
-        */
-       if (abo->kfd_bo)
-               return 0;
-
        if (amdgpu_ttm_tt_get_usermm(bo->ttm))
                return -EPERM;
        return drm_vma_node_verify_access(&abo->tbo.base.vma_node,
@@ -288,7 +294,7 @@ error_free:
 }
 
 /**
- * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
+ * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
  * @adev: amdgpu device
  * @src: buffer/address where to read from
  * @dst: buffer/address where to write to
@@ -670,10 +676,8 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
        struct amdgpu_ttm_tt *gtt = (void *)ttm;
        unsigned long start = gtt->userptr;
        struct vm_area_struct *vma;
-       struct hmm_range *range;
-       unsigned long timeout;
        struct mm_struct *mm;
-       unsigned long i;
+       bool readonly;
        int r = 0;
 
        mm = bo->notifier.mm;
@@ -689,76 +693,26 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
        if (!mmget_not_zero(mm)) /* Happens during process shutdown */
                return -ESRCH;
 
-       range = kzalloc(sizeof(*range), GFP_KERNEL);
-       if (unlikely(!range)) {
-               r = -ENOMEM;
-               goto out;
-       }
-       range->notifier = &bo->notifier;
-       range->start = bo->notifier.interval_tree.start;
-       range->end = bo->notifier.interval_tree.last + 1;
-       range->default_flags = HMM_PFN_REQ_FAULT;
-       if (!amdgpu_ttm_tt_is_readonly(ttm))
-               range->default_flags |= HMM_PFN_REQ_WRITE;
-
-       range->hmm_pfns = kvmalloc_array(ttm->num_pages,
-                                        sizeof(*range->hmm_pfns), GFP_KERNEL);
-       if (unlikely(!range->hmm_pfns)) {
-               r = -ENOMEM;
-               goto out_free_ranges;
-       }
-
        mmap_read_lock(mm);
        vma = find_vma(mm, start);
+       mmap_read_unlock(mm);
        if (unlikely(!vma || start < vma->vm_start)) {
                r = -EFAULT;
-               goto out_unlock;
+               goto out_putmm;
        }
        if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
                vma->vm_file)) {
                r = -EPERM;
-               goto out_unlock;
-       }
-       mmap_read_unlock(mm);
-       timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
-
-retry:
-       range->notifier_seq = mmu_interval_read_begin(&bo->notifier);
-
-       mmap_read_lock(mm);
-       r = hmm_range_fault(range);
-       mmap_read_unlock(mm);
-       if (unlikely(r)) {
-               /*
-                * FIXME: This timeout should encompass the retry from
-                * mmu_interval_read_retry() as well.
-                */
-               if (r == -EBUSY && !time_after(jiffies, timeout))
-                       goto retry;
-               goto out_free_pfns;
+               goto out_putmm;
        }
 
-       /*
-        * Due to default_flags, all pages are HMM_PFN_VALID or
-        * hmm_range_fault() fails. FIXME: The pages cannot be touched outside
-        * the notifier_lock, and mmu_interval_read_retry() must be done first.
-        */
-       for (i = 0; i < ttm->num_pages; i++)
-               pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]);
-
-       gtt->range = range;
+       readonly = amdgpu_ttm_tt_is_readonly(ttm);
+       r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start,
+                                      ttm->num_pages, &gtt->range, readonly,
+                                      false);
+out_putmm:
        mmput(mm);
 
-       return 0;
-
-out_unlock:
-       mmap_read_unlock(mm);
-out_free_pfns:
-       kvfree(range->hmm_pfns);
-out_free_ranges:
-       kfree(range);
-out:
-       mmput(mm);
        return r;
 }
 
@@ -787,10 +741,7 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
                 * FIXME: Must always hold notifier_lock for this, and must
                 * not ignore the return code.
                 */
-               r = mmu_interval_read_retry(gtt->range->notifier,
-                                        gtt->range->notifier_seq);
-               kvfree(gtt->range->hmm_pfns);
-               kfree(gtt->range);
+               r = amdgpu_hmm_range_get_pages_done(gtt->range);
                gtt->range = NULL;
        }
 
index 0c9c5255aa429742ce61a22d55f6004a9783c650..a57842689d428d411ecf900f9415c9b9e188da02 100644 (file)
@@ -50,9 +50,12 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
        struct drm_device *ddev = adev_to_drm(adev);
 
        /* enable virtual display */
-       if (adev->mode_info.num_crtc == 0)
-               adev->mode_info.num_crtc = 1;
-       adev->enable_virtual_display = true;
+       if (adev->asic_type != CHIP_ALDEBARAN &&
+           adev->asic_type != CHIP_ARCTURUS) {
+               if (adev->mode_info.num_crtc == 0)
+                       adev->mode_info.num_crtc = 1;
+               adev->enable_virtual_display = true;
+       }
        ddev->driver_features &= ~DRIVER_ATOMIC;
        adev->cg_flags = 0;
        adev->pg_flags = 0;
@@ -679,6 +682,7 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
                case CHIP_VEGA10:
                case CHIP_VEGA20:
                case CHIP_ARCTURUS:
+               case CHIP_ALDEBARAN:
                        soc15_set_virt_ops(adev);
                        break;
                case CHIP_NAVI10:
index 4a3e3f72e12774bfd7ec951d5a17d2d6d7b50918..edc63d3e087ec3c845c051142d4fd46e4ddf3af9 100644 (file)
@@ -38,6 +38,7 @@
 #include "amdgpu_gmc.h"
 #include "amdgpu_xgmi.h"
 #include "amdgpu_dma_buf.h"
+#include "kfd_svm.h"
 
 /**
  * DOC: GPUVM
@@ -850,35 +851,60 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 }
 
 /**
- * amdgpu_vm_bo_param - fill in parameters for PD/PT allocation
+ * amdgpu_vm_pt_create - create bo for PD/PT
  *
  * @adev: amdgpu_device pointer
  * @vm: requesting vm
  * @level: the page table level
  * @immediate: use a immediate update
- * @bp: resulting BO allocation parameters
+ * @bo: pointer to the buffer object pointer
  */
-static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+static int amdgpu_vm_pt_create(struct amdgpu_device *adev,
+                              struct amdgpu_vm *vm,
                               int level, bool immediate,
-                              struct amdgpu_bo_param *bp)
+                              struct amdgpu_bo **bo)
 {
-       memset(bp, 0, sizeof(*bp));
+       struct amdgpu_bo_param bp;
+       int r;
 
-       bp->size = amdgpu_vm_bo_size(adev, level);
-       bp->byte_align = AMDGPU_GPU_PAGE_SIZE;
-       bp->domain = AMDGPU_GEM_DOMAIN_VRAM;
-       bp->domain = amdgpu_bo_get_preferred_pin_domain(adev, bp->domain);
-       bp->flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+       memset(&bp, 0, sizeof(bp));
+
+       bp.size = amdgpu_vm_bo_size(adev, level);
+       bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
+       bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+       bp.domain = amdgpu_bo_get_preferred_pin_domain(adev, bp.domain);
+       bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
                AMDGPU_GEM_CREATE_CPU_GTT_USWC;
-       bp->bo_ptr_size = sizeof(struct amdgpu_bo);
+       bp.bo_ptr_size = sizeof(struct amdgpu_bo);
        if (vm->use_cpu_for_update)
-               bp->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
-       else if (!vm->root.base.bo || vm->root.base.bo->shadow)
-               bp->flags |= AMDGPU_GEM_CREATE_SHADOW;
-       bp->type = ttm_bo_type_kernel;
-       bp->no_wait_gpu = immediate;
+               bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+
+       bp.type = ttm_bo_type_kernel;
+       bp.no_wait_gpu = immediate;
        if (vm->root.base.bo)
-               bp->resv = vm->root.base.bo->tbo.base.resv;
+               bp.resv = vm->root.base.bo->tbo.base.resv;
+
+       r = amdgpu_bo_create(adev, &bp, bo);
+       if (r)
+               return r;
+
+       if (vm->is_compute_context && (adev->flags & AMD_IS_APU))
+               return 0;
+
+       if (!bp.resv)
+               WARN_ON(dma_resv_lock((*bo)->tbo.base.resv,
+                                     NULL));
+       r = amdgpu_bo_create_shadow(adev, bp.size, *bo);
+
+       if (!bp.resv)
+               dma_resv_unlock((*bo)->tbo.base.resv);
+
+       if (r) {
+               amdgpu_bo_unref(bo);
+               return r;
+       }
+
+       return 0;
 }
 
 /**
@@ -901,7 +927,6 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
                               bool immediate)
 {
        struct amdgpu_vm_pt *entry = cursor->entry;
-       struct amdgpu_bo_param bp;
        struct amdgpu_bo *pt;
        int r;
 
@@ -919,9 +944,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
        if (entry->base.bo)
                return 0;
 
-       amdgpu_vm_bo_param(adev, vm, cursor->level, immediate, &bp);
-
-       r = amdgpu_bo_create(adev, &bp, &pt);
+       r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt);
        if (r)
                return r;
 
@@ -1593,15 +1616,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
  * Returns:
  * 0 for success, -EINVAL for failure.
  */
-static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
-                                      struct amdgpu_device *bo_adev,
-                                      struct amdgpu_vm *vm, bool immediate,
-                                      bool unlocked, struct dma_resv *resv,
-                                      uint64_t start, uint64_t last,
-                                      uint64_t flags, uint64_t offset,
-                                      struct drm_mm_node *nodes,
-                                      dma_addr_t *pages_addr,
-                                      struct dma_fence **fence)
+int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
+                               struct amdgpu_device *bo_adev,
+                               struct amdgpu_vm *vm, bool immediate,
+                               bool unlocked, struct dma_resv *resv,
+                               uint64_t start, uint64_t last,
+                               uint64_t flags, uint64_t offset,
+                               struct drm_mm_node *nodes,
+                               dma_addr_t *pages_addr,
+                               struct dma_fence **fence)
 {
        struct amdgpu_vm_update_params params;
        enum amdgpu_sync_mode sync_mode;
@@ -2818,7 +2841,6 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
  *
  * @adev: amdgpu_device pointer
  * @vm: requested vm
- * @vm_context: Indicates if it GFX or Compute context
  * @pasid: Process address space identifier
  *
  * Init @vm fields.
@@ -2826,10 +2848,8 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
  * Returns:
  * 0 for success, error for failure.
  */
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-                  int vm_context, u32 pasid)
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid)
 {
-       struct amdgpu_bo_param bp;
        struct amdgpu_bo *root;
        int r, i;
 
@@ -2861,16 +2881,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
        vm->pte_support_ats = false;
        vm->is_compute_context = false;
 
-       if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
-               vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
-                                               AMDGPU_VM_USE_CPU_FOR_COMPUTE);
+       vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
+                                   AMDGPU_VM_USE_CPU_FOR_GFX);
 
-               if (adev->asic_type == CHIP_RAVEN)
-                       vm->pte_support_ats = true;
-       } else {
-               vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
-                                               AMDGPU_VM_USE_CPU_FOR_GFX);
-       }
        DRM_DEBUG_DRIVER("VM update mode is %s\n",
                         vm->use_cpu_for_update ? "CPU" : "SDMA");
        WARN_ONCE((vm->use_cpu_for_update &&
@@ -2887,10 +2900,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
        mutex_init(&vm->eviction_lock);
        vm->evicting = false;
 
-       amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false, &bp);
-       if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
-               bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW;
-       r = amdgpu_bo_create(adev, &bp, &root);
+       r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
+                               false, &root);
        if (r)
                goto error_free_delayed;
 
@@ -3349,6 +3360,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
 bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
                            uint64_t addr)
 {
+       bool is_compute_context = false;
        struct amdgpu_bo *root;
        uint64_t value, flags;
        struct amdgpu_vm *vm;
@@ -3356,15 +3368,25 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
 
        spin_lock(&adev->vm_manager.pasid_lock);
        vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
-       if (vm)
+       if (vm) {
                root = amdgpu_bo_ref(vm->root.base.bo);
-       else
+               is_compute_context = vm->is_compute_context;
+       } else {
                root = NULL;
+       }
        spin_unlock(&adev->vm_manager.pasid_lock);
 
        if (!root)
                return false;
 
+       addr /= AMDGPU_GPU_PAGE_SIZE;
+
+       if (is_compute_context &&
+           !svm_range_restore_pages(adev, pasid, addr)) {
+               amdgpu_bo_unref(&root);
+               return true;
+       }
+
        r = amdgpu_bo_reserve(root, true);
        if (r)
                goto error_unref;
@@ -3378,18 +3400,16 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
        if (!vm)
                goto error_unlock;
 
-       addr /= AMDGPU_GPU_PAGE_SIZE;
        flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
                AMDGPU_PTE_SYSTEM;
 
-       if (vm->is_compute_context) {
+       if (is_compute_context) {
                /* Intentionally setting invalid PTE flag
                 * combination to force a no-retry-fault
                 */
                flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE |
                        AMDGPU_PTE_TF;
                value = 0;
-
        } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
                /* Redirect the access to the dummy page */
                value = adev->dummy_page_addr;
index 6fd7dad0540a7d412779bb16fc01d8e061ebb460..ea60ec122b5117acd3c1035119b9f0e6865e918e 100644 (file)
@@ -121,9 +121,6 @@ struct amdgpu_bo_list_entry;
 /* max vmids dedicated for process */
 #define AMDGPU_VM_MAX_RESERVED_VMID    1
 
-#define AMDGPU_VM_CONTEXT_GFX 0
-#define AMDGPU_VM_CONTEXT_COMPUTE 1
-
 /* See vm_update_mode */
 #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)
 #define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1)
@@ -367,6 +364,8 @@ struct amdgpu_vm_manager {
        spinlock_t                              pasid_lock;
 };
 
+struct amdgpu_bo_va_mapping;
+
 #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
 #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
 #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
@@ -378,8 +377,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev);
 void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
 
 long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-                  int vm_context, u32 pasid);
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid);
 int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid);
 void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
@@ -398,6 +396,15 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
                          struct dma_fence **fence);
 int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
                           struct amdgpu_vm *vm);
+int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
+                               struct amdgpu_device *bo_adev,
+                               struct amdgpu_vm *vm, bool immediate,
+                               bool unlocked, struct dma_resv *resv,
+                               uint64_t start, uint64_t last,
+                               uint64_t flags, uint64_t offset,
+                               struct drm_mm_node *nodes,
+                               dma_addr_t *pages_addr,
+                               struct dma_fence **fence);
 int amdgpu_vm_bo_update(struct amdgpu_device *adev,
                        struct amdgpu_bo_va *bo_va,
                        bool clear);
index f7235438535f5ac00bce0ee859ab96088598a39a..f78d21910e07992dc6e7bd7e58473a98be6d5cbc 100644 (file)
 #include "amdgpu_atomfirmware.h"
 #include "atom.h"
 
-static inline struct amdgpu_vram_mgr *to_vram_mgr(struct ttm_resource_manager *man)
+static inline struct amdgpu_vram_mgr *
+to_vram_mgr(struct ttm_resource_manager *man)
 {
        return container_of(man, struct amdgpu_vram_mgr, manager);
 }
 
-static inline struct amdgpu_device *to_amdgpu_device(struct amdgpu_vram_mgr *mgr)
+static inline struct amdgpu_device *
+to_amdgpu_device(struct amdgpu_vram_mgr *mgr)
 {
        return container_of(mgr, struct amdgpu_device, mman.vram_mgr);
 }
@@ -82,12 +84,14 @@ static ssize_t amdgpu_mem_info_vis_vram_total_show(struct device *dev,
  * amount of currently used VRAM in bytes
  */
 static ssize_t amdgpu_mem_info_vram_used_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
+                                             struct device_attribute *attr,
+                                             char *buf)
 {
        struct drm_device *ddev = dev_get_drvdata(dev);
        struct amdgpu_device *adev = drm_to_adev(ddev);
-       struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
+       struct ttm_resource_manager *man;
 
+       man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
        return sysfs_emit(buf, "%llu\n", amdgpu_vram_mgr_usage(man));
 }
 
@@ -100,18 +104,28 @@ static ssize_t amdgpu_mem_info_vram_used_show(struct device *dev,
  * amount of currently used visible VRAM in bytes
  */
 static ssize_t amdgpu_mem_info_vis_vram_used_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
+                                                 struct device_attribute *attr,
+                                                 char *buf)
 {
        struct drm_device *ddev = dev_get_drvdata(dev);
        struct amdgpu_device *adev = drm_to_adev(ddev);
-       struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
+       struct ttm_resource_manager *man;
 
+       man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
        return sysfs_emit(buf, "%llu\n", amdgpu_vram_mgr_vis_usage(man));
 }
 
+/**
+ * DOC: mem_info_vram_vendor
+ *
+ * The amdgpu driver provides a sysfs API for reporting the vendor of the
+ * installed VRAM
+ * The file mem_info_vram_vendor is used for this and returns the name of the
+ * vendor.
+ */
 static ssize_t amdgpu_mem_info_vram_vendor(struct device *dev,
-                                                struct device_attribute *attr,
-                                                char *buf)
+                                          struct device_attribute *attr,
+                                          char *buf)
 {
        struct drm_device *ddev = dev_get_drvdata(dev);
        struct amdgpu_device *adev = drm_to_adev(ddev);
@@ -162,78 +176,6 @@ static const struct attribute *amdgpu_vram_mgr_attributes[] = {
        NULL
 };
 
-static const struct ttm_resource_manager_func amdgpu_vram_mgr_func;
-
-/**
- * amdgpu_vram_mgr_init - init VRAM manager and DRM MM
- *
- * @adev: amdgpu_device pointer
- *
- * Allocate and initialize the VRAM manager.
- */
-int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
-{
-       struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
-       struct ttm_resource_manager *man = &mgr->manager;
-       int ret;
-
-       ttm_resource_manager_init(man, adev->gmc.real_vram_size >> PAGE_SHIFT);
-
-       man->func = &amdgpu_vram_mgr_func;
-
-       drm_mm_init(&mgr->mm, 0, man->size);
-       spin_lock_init(&mgr->lock);
-       INIT_LIST_HEAD(&mgr->reservations_pending);
-       INIT_LIST_HEAD(&mgr->reserved_pages);
-
-       /* Add the two VRAM-related sysfs files */
-       ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
-       if (ret)
-               DRM_ERROR("Failed to register sysfs\n");
-
-       ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
-       ttm_resource_manager_set_used(man, true);
-       return 0;
-}
-
-/**
- * amdgpu_vram_mgr_fini - free and destroy VRAM manager
- *
- * @adev: amdgpu_device pointer
- *
- * Destroy and free the VRAM manager, returns -EBUSY if ranges are still
- * allocated inside it.
- */
-void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
-{
-       struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
-       struct ttm_resource_manager *man = &mgr->manager;
-       int ret;
-       struct amdgpu_vram_reservation *rsv, *temp;
-
-       ttm_resource_manager_set_used(man, false);
-
-       ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
-       if (ret)
-               return;
-
-       spin_lock(&mgr->lock);
-       list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node)
-               kfree(rsv);
-
-       list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) {
-               drm_mm_remove_node(&rsv->mm_node);
-               kfree(rsv);
-       }
-       drm_mm_takedown(&mgr->mm);
-       spin_unlock(&mgr->lock);
-
-       sysfs_remove_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
-
-       ttm_resource_manager_cleanup(man);
-       ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL);
-}
-
 /**
  * amdgpu_vram_mgr_vis_size - Calculate visible node size
  *
@@ -283,6 +225,7 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
        return usage;
 }
 
+/* Commit the reservation of VRAM pages */
 static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man)
 {
        struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
@@ -415,13 +358,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
                               const struct ttm_place *place,
                               struct ttm_resource *mem)
 {
+       unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
        struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
        struct amdgpu_device *adev = to_amdgpu_device(mgr);
+       uint64_t vis_usage = 0, mem_bytes, max_bytes;
        struct drm_mm *mm = &mgr->mm;
-       struct drm_mm_node *nodes;
        enum drm_mm_insert_mode mode;
-       unsigned long lpfn, num_nodes, pages_per_node, pages_left;
-       uint64_t vis_usage = 0, mem_bytes, max_bytes;
+       struct drm_mm_node *nodes;
        unsigned i;
        int r;
 
@@ -448,10 +391,10 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
                pages_per_node = HPAGE_PMD_NR;
 #else
                /* default to 2MB */
-               pages_per_node = (2UL << (20UL - PAGE_SHIFT));
+               pages_per_node = 2UL << (20UL - PAGE_SHIFT);
 #endif
-               pages_per_node = max((uint32_t)pages_per_node,
-                                    tbo->page_alignment);
+               pages_per_node = max_t(uint32_t, pages_per_node,
+                                      tbo->page_alignment);
                num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
        }
 
@@ -469,42 +412,37 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
        mem->start = 0;
        pages_left = mem->num_pages;
 
-       spin_lock(&mgr->lock);
-       for (i = 0; pages_left >= pages_per_node; ++i) {
-               unsigned long pages = rounddown_pow_of_two(pages_left);
+       /* Limit maximum size to 2GB due to SG table limitations */
+       pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
 
-               /* Limit maximum size to 2GB due to SG table limitations */
-               pages = min(pages, (2UL << (30 - PAGE_SHIFT)));
-
-               r = drm_mm_insert_node_in_range(mm, &nodes[i], pages,
-                                               pages_per_node, 0,
-                                               place->fpfn, lpfn,
-                                               mode);
-               if (unlikely(r))
-                       break;
-
-               vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
-               amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
-               pages_left -= pages;
-       }
-
-       for (; pages_left; ++i) {
-               unsigned long pages = min(pages_left, pages_per_node);
+       i = 0;
+       spin_lock(&mgr->lock);
+       while (pages_left) {
                uint32_t alignment = tbo->page_alignment;
 
-               if (pages == pages_per_node)
+               if (pages >= pages_per_node)
                        alignment = pages_per_node;
 
-               r = drm_mm_insert_node_in_range(mm, &nodes[i],
-                                               pages, alignment, 0,
-                                               place->fpfn, lpfn,
-                                               mode);
-               if (unlikely(r))
+               r = drm_mm_insert_node_in_range(mm, &nodes[i], pages, alignment,
+                                               0, place->fpfn, lpfn, mode);
+               if (unlikely(r)) {
+                       if (pages > pages_per_node) {
+                               if (is_power_of_2(pages))
+                                       pages = pages / 2;
+                               else
+                                       pages = rounddown_pow_of_two(pages);
+                               continue;
+                       }
                        goto error;
+               }
 
                vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
                amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
                pages_left -= pages;
+               ++i;
+
+               if (pages > pages_left)
+                       pages = pages_left;
        }
        spin_unlock(&mgr->lock);
 
@@ -728,3 +666,73 @@ static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
        .free   = amdgpu_vram_mgr_del,
        .debug  = amdgpu_vram_mgr_debug
 };
+
+/**
+ * amdgpu_vram_mgr_init - init VRAM manager and DRM MM
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate and initialize the VRAM manager.
+ */
+int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
+{
+       struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
+       struct ttm_resource_manager *man = &mgr->manager;
+       int ret;
+
+       ttm_resource_manager_init(man, adev->gmc.real_vram_size >> PAGE_SHIFT);
+
+       man->func = &amdgpu_vram_mgr_func;
+
+       drm_mm_init(&mgr->mm, 0, man->size);
+       spin_lock_init(&mgr->lock);
+       INIT_LIST_HEAD(&mgr->reservations_pending);
+       INIT_LIST_HEAD(&mgr->reserved_pages);
+
+       /* Add the two VRAM-related sysfs files */
+       ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
+       if (ret)
+               DRM_ERROR("Failed to register sysfs\n");
+
+       ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
+       ttm_resource_manager_set_used(man, true);
+       return 0;
+}
+
+/**
+ * amdgpu_vram_mgr_fini - free and destroy VRAM manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Destroy and free the VRAM manager, returns -EBUSY if ranges are still
+ * allocated inside it.
+ */
+void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
+{
+       struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
+       struct ttm_resource_manager *man = &mgr->manager;
+       int ret;
+       struct amdgpu_vram_reservation *rsv, *temp;
+
+       ttm_resource_manager_set_used(man, false);
+
+       ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
+       if (ret)
+               return;
+
+       spin_lock(&mgr->lock);
+       list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node)
+               kfree(rsv);
+
+       list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) {
+               drm_mm_remove_node(&rsv->mm_node);
+               kfree(rsv);
+       }
+       drm_mm_takedown(&mgr->mm);
+       spin_unlock(&mgr->lock);
+
+       sysfs_remove_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
+
+       ttm_resource_manager_cleanup(man);
+       ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL);
+}
index 1a8f6d4baab24ff3107155822f9d6320cadab652..befd0b4b7bea0059a5474fde08ef3ab4f1cab2c6 100644 (file)
@@ -98,9 +98,9 @@ union amd_sriov_msg_feature_flags {
 
 union amd_sriov_reg_access_flags {
        struct {
-               uint32_t vf_reg_access_ih    : 1;
-               uint32_t vf_reg_access_mmhub : 1;
-               uint32_t vf_reg_access_gc    : 1;
+               uint32_t vf_reg_psp_access_ih    : 1;
+               uint32_t vf_reg_rlc_access_mmhub : 1;
+               uint32_t vf_reg_rlc_access_gc    : 1;
                uint32_t reserved            : 29;
        } flags;
        uint32_t all;
index 5c11144da0513dc4a92064e6d50e3ad8bb7d95b2..33324427b555e67c4b72c89d930ae754678a0a74 100644 (file)
@@ -421,6 +421,11 @@ static int dce_virtual_sw_init(void *handle)
 static int dce_virtual_sw_fini(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+       int i = 0;
+
+       for (i = 0; i < adev->mode_info.num_crtc; i++)
+               if (adev->mode_info.crtcs[i])
+                       hrtimer_cancel(&adev->mode_info.crtcs[i]->vblank_timer);
 
        kfree(adev->mode_info.bios_hardcoded_edid);
 
@@ -480,13 +485,6 @@ static int dce_virtual_hw_init(void *handle)
 
 static int dce_virtual_hw_fini(void *handle)
 {
-       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-       int i = 0;
-
-       for (i = 0; i<adev->mode_info.num_crtc; i++)
-               if (adev->mode_info.crtcs[i])
-                       hrtimer_cancel(&adev->mode_info.crtcs[i]->vblank_timer);
-
        return 0;
 }
 
index 0d8459d63bac172a3710f6448e233657c2676fe7..36ba229576d83d147ae5935cfdffcd333e2865ad 100644 (file)
@@ -219,11 +219,11 @@ static void df_v3_6_query_hashes(struct amdgpu_device *adev)
        adev->df.hash_status.hash_2m = false;
        adev->df.hash_status.hash_1g = false;
 
-       if (adev->asic_type != CHIP_ARCTURUS)
-               return;
-
-       /* encoding for hash-enabled on Arcturus */
-       if (adev->df.funcs->get_fb_channel_number(adev) == 0xe) {
+       /* encoding for hash-enabled on Arcturus and Aldebaran */
+       if ((adev->asic_type == CHIP_ARCTURUS &&
+            adev->df.funcs->get_fb_channel_number(adev) == 0xe) ||
+            (adev->asic_type == CHIP_ALDEBARAN &&
+             adev->df.funcs->get_fb_channel_number(adev) == 0x1e)) {
                tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DfGlobalCtrl);
                adev->df.hash_status.hash_64k = REG_GET_FIELD(tmp,
                                                DF_CS_UMC_AON0_DfGlobalCtrl,
@@ -278,7 +278,12 @@ static u32 df_v3_6_get_fb_channel_number(struct amdgpu_device *adev)
        u32 tmp;
 
        tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0);
-       tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+       if (adev->asic_type == CHIP_ALDEBARAN)
+               tmp &=
+               ALDEBARAN_DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+       else
+               tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+
        tmp >>= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
 
        return tmp;
index a078a38c2ceeaaf6ff97a3559e22c9dc683c3c91..22608c45f07c3fad915b298e8eeb3cfda4b4859c 100644 (file)
@@ -3937,7 +3937,8 @@ static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
 {
        u32 tmp;
 
-       if (adev->asic_type != CHIP_ARCTURUS)
+       if (adev->asic_type != CHIP_ARCTURUS &&
+           adev->asic_type != CHIP_ALDEBARAN)
                return;
 
        tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
@@ -4559,8 +4560,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
        if (!ring->sched.ready)
                return 0;
 
-       if (adev->asic_type == CHIP_ARCTURUS ||
-           adev->asic_type == CHIP_ALDEBARAN) {
+       if (adev->asic_type == CHIP_ARCTURUS) {
                vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
                vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
                vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
@@ -4745,7 +4745,11 @@ static int gfx_v9_0_ecc_late_init(void *handle)
        }
 
        /* requires IBs so do in late init after IB pool is initialized */
-       r = gfx_v9_0_do_edc_gpr_workarounds(adev);
+       if (adev->asic_type == CHIP_ALDEBARAN)
+               r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
+       else
+               r = gfx_v9_0_do_edc_gpr_workarounds(adev);
+
        if (r)
                return r;
 
index a30c7c10cd9a76433de6b05d8db2ee3f5c7f00cb..dbad9ef002d594b4e15df2cf237287c839b8c78f 100644 (file)
@@ -22,6 +22,7 @@
  */
 #include "amdgpu.h"
 #include "soc15.h"
+#include "soc15d.h"
 
 #include "gc/gc_9_4_2_offset.h"
 #include "gc/gc_9_4_2_sh_mask.h"
 #include "amdgpu_ras.h"
 #include "amdgpu_gfx.h"
 
+#define SE_ID_MAX 8
+#define CU_ID_MAX 16
+#define SIMD_ID_MAX 4
+#define WAVE_ID_MAX 10
+
 enum gfx_v9_4_2_utc_type {
        VML2_MEM,
        VML2_WALKER_MEM,
@@ -79,6 +85,634 @@ static const struct soc15_reg_golden golden_settings_gc_9_4_2_alde[] = {
        SOC15_REG_GOLDEN_VALUE(GC, 0, regTCI_CNTL_3, 0xff, 0x20),
 };
 
+/**
+ * This shader is used to clear VGPRS and LDS, and also write the input
+ * pattern into the write back buffer, which will be used by driver to
+ * check whether all SIMDs have been covered.
+*/
+static const u32 vgpr_init_compute_shader_aldebaran[] = {
+       0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
+       0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
+       0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xd3d94000,
+       0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080, 0xd3d94003,
+       0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080, 0xd3d94006,
+       0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080, 0xd3d94009,
+       0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080, 0xd3d9400c,
+       0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080, 0xd3d9400f,
+       0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080, 0xd3d94012,
+       0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080, 0xd3d94015,
+       0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080, 0xd3d94018,
+       0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080, 0xd3d9401b,
+       0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080, 0xd3d9401e,
+       0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080, 0xd3d94021,
+       0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080, 0xd3d94024,
+       0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080, 0xd3d94027,
+       0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080, 0xd3d9402a,
+       0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080, 0xd3d9402d,
+       0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080, 0xd3d94030,
+       0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080, 0xd3d94033,
+       0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080, 0xd3d94036,
+       0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080, 0xd3d94039,
+       0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080, 0xd3d9403c,
+       0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080, 0xd3d9403f,
+       0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080, 0xd3d94042,
+       0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080, 0xd3d94045,
+       0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080, 0xd3d94048,
+       0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080, 0xd3d9404b,
+       0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080, 0xd3d9404e,
+       0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080, 0xd3d94051,
+       0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080, 0xd3d94054,
+       0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080, 0xd3d94057,
+       0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080, 0xd3d9405a,
+       0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080, 0xd3d9405d,
+       0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080, 0xd3d94060,
+       0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080, 0xd3d94063,
+       0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080, 0xd3d94066,
+       0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080, 0xd3d94069,
+       0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080, 0xd3d9406c,
+       0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080, 0xd3d9406f,
+       0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080, 0xd3d94072,
+       0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080, 0xd3d94075,
+       0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080, 0xd3d94078,
+       0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080, 0xd3d9407b,
+       0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080, 0xd3d9407e,
+       0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080, 0xd3d94081,
+       0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080, 0xd3d94084,
+       0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080, 0xd3d94087,
+       0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080, 0xd3d9408a,
+       0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080, 0xd3d9408d,
+       0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080, 0xd3d94090,
+       0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080, 0xd3d94093,
+       0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080, 0xd3d94096,
+       0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080, 0xd3d94099,
+       0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080, 0xd3d9409c,
+       0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080, 0xd3d9409f,
+       0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080, 0xd3d940a2,
+       0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080, 0xd3d940a5,
+       0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080, 0xd3d940a8,
+       0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080, 0xd3d940ab,
+       0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080, 0xd3d940ae,
+       0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080, 0xd3d940b1,
+       0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080, 0xd3d940b4,
+       0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080, 0xd3d940b7,
+       0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080, 0xd3d940ba,
+       0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080, 0xd3d940bd,
+       0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080, 0xd3d940c0,
+       0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080, 0xd3d940c3,
+       0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080, 0xd3d940c6,
+       0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080, 0xd3d940c9,
+       0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080, 0xd3d940cc,
+       0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080, 0xd3d940cf,
+       0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080, 0xd3d940d2,
+       0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080, 0xd3d940d5,
+       0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080, 0xd3d940d8,
+       0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080, 0xd3d940db,
+       0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080, 0xd3d940de,
+       0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080, 0xd3d940e1,
+       0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080, 0xd3d940e4,
+       0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080, 0xd3d940e7,
+       0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080, 0xd3d940ea,
+       0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080, 0xd3d940ed,
+       0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080, 0xd3d940f0,
+       0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080, 0xd3d940f3,
+       0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080, 0xd3d940f6,
+       0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080, 0xd3d940f9,
+       0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080, 0xd3d940fc,
+       0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080, 0xd3d940ff,
+       0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a, 0x7e000280,
+       0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280, 0x7e0c0280,
+       0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000, 0xd28c0001,
+       0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xbe8b0004, 0xb78b4000,
+       0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000, 0x00020201,
+       0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a, 0xbf84fff8,
+       0xbf810000,
+};
+
+const struct soc15_reg_entry vgpr_init_regs_aldebaran[] = {
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 4 },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0xbf },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x400006 },  /* 64KB LDS */
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x3F }, /*  63 - accum-offset = 256 */
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
+};
+
+/**
+ * The below shaders are used to clear SGPRS, and also write the input
+ * pattern into the write back buffer. The first two dispatch should be
+ * scheduled simultaneously which make sure that all SGPRS could be
+ * allocated, so the dispatch 1 need check write back buffer before scheduled,
+ * make sure that waves of dispatch 0 are all dispacthed to all simds
+ * balanced. both dispatch 0 and dispatch 1 should be halted until all waves
+ * are dispatched, and then driver write a pattern to the shared memory to make
+ * all waves continue.
+*/
+static const u32 sgpr112_init_compute_shader_aldebaran[] = {
+       0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
+       0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
+       0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200,
+       0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102,
+       0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xbefc0080, 0xbeea0080,
+       0xbeeb0080, 0xbf00f280, 0xbee60080, 0xbee70080, 0xbee80080, 0xbee90080,
+       0xbefe0080, 0xbeff0080, 0xbe880080, 0xbe890080, 0xbe8a0080, 0xbe8b0080,
+       0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, 0xbe900080, 0xbe910080,
+       0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, 0xbe960080, 0xbe970080,
+       0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, 0xbe9c0080, 0xbe9d0080,
+       0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, 0xbea20080, 0xbea30080,
+       0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, 0xbea80080, 0xbea90080,
+       0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, 0xbeae0080, 0xbeaf0080,
+       0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, 0xbeb40080, 0xbeb50080,
+       0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, 0xbeba0080, 0xbebb0080,
+       0xbebc0080, 0xbebd0080, 0xbebe0080, 0xbebf0080, 0xbec00080, 0xbec10080,
+       0xbec20080, 0xbec30080, 0xbec40080, 0xbec50080, 0xbec60080, 0xbec70080,
+       0xbec80080, 0xbec90080, 0xbeca0080, 0xbecb0080, 0xbecc0080, 0xbecd0080,
+       0xbece0080, 0xbecf0080, 0xbed00080, 0xbed10080, 0xbed20080, 0xbed30080,
+       0xbed40080, 0xbed50080, 0xbed60080, 0xbed70080, 0xbed80080, 0xbed90080,
+       0xbeda0080, 0xbedb0080, 0xbedc0080, 0xbedd0080, 0xbede0080, 0xbedf0080,
+       0xbee00080, 0xbee10080, 0xbee20080, 0xbee30080, 0xbee40080, 0xbee50080,
+       0xbf810000
+};
+
+const struct soc15_reg_entry sgpr112_init_regs_aldebaran[] = {
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 8 },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x340 },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
+};
+
+static const u32 sgpr96_init_compute_shader_aldebaran[] = {
+       0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
+       0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
+       0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200,
+       0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102,
+       0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xbefc0080, 0xbeea0080,
+       0xbeeb0080, 0xbf00f280, 0xbee60080, 0xbee70080, 0xbee80080, 0xbee90080,
+       0xbefe0080, 0xbeff0080, 0xbe880080, 0xbe890080, 0xbe8a0080, 0xbe8b0080,
+       0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, 0xbe900080, 0xbe910080,
+       0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, 0xbe960080, 0xbe970080,
+       0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, 0xbe9c0080, 0xbe9d0080,
+       0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, 0xbea20080, 0xbea30080,
+       0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, 0xbea80080, 0xbea90080,
+       0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, 0xbeae0080, 0xbeaf0080,
+       0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, 0xbeb40080, 0xbeb50080,
+       0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, 0xbeba0080, 0xbebb0080,
+       0xbebc0080, 0xbebd0080, 0xbebe0080, 0xbebf0080, 0xbec00080, 0xbec10080,
+       0xbec20080, 0xbec30080, 0xbec40080, 0xbec50080, 0xbec60080, 0xbec70080,
+       0xbec80080, 0xbec90080, 0xbeca0080, 0xbecb0080, 0xbecc0080, 0xbecd0080,
+       0xbece0080, 0xbecf0080, 0xbed00080, 0xbed10080, 0xbed20080, 0xbed30080,
+       0xbed40080, 0xbed50080, 0xbed60080, 0xbed70080, 0xbed80080, 0xbed90080,
+       0xbf810000,
+};
+
+const struct soc15_reg_entry sgpr96_init_regs_aldebaran[] = {
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 0xc },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x2c0 },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
+};
+
+/**
+ * This shader is used to clear the uninitiated sgprs after the above
+ * two dispatches, because of hardware feature, dispath 0 couldn't clear
+ * top hole sgprs. Therefore need 4 waves per SIMD to cover these sgprs
+*/
+static const u32 sgpr64_init_compute_shader_aldebaran[] = {
+       0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
+       0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
+       0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200,
+       0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102,
+       0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xbefc0080, 0xbeea0080,
+       0xbeeb0080, 0xbf00f280, 0xbee60080, 0xbee70080, 0xbee80080, 0xbee90080,
+       0xbefe0080, 0xbeff0080, 0xbe880080, 0xbe890080, 0xbe8a0080, 0xbe8b0080,
+       0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, 0xbe900080, 0xbe910080,
+       0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, 0xbe960080, 0xbe970080,
+       0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, 0xbe9c0080, 0xbe9d0080,
+       0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, 0xbea20080, 0xbea30080,
+       0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, 0xbea80080, 0xbea90080,
+       0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, 0xbeae0080, 0xbeaf0080,
+       0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, 0xbeb40080, 0xbeb50080,
+       0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, 0xbf810000,
+};
+
+const struct soc15_reg_entry sgpr64_init_regs_aldebaran[] = {
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 0x10 },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x1c0 },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
+       { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
+};
+
+static int gfx_v9_4_2_run_shader(struct amdgpu_device *adev,
+                                struct amdgpu_ring *ring,
+                                struct amdgpu_ib *ib,
+                                const u32 *shader_ptr, u32 shader_size,
+                                const struct soc15_reg_entry *init_regs, u32 regs_size,
+                                u32 compute_dim_x, u64 wb_gpu_addr, u32 pattern,
+                                struct dma_fence **fence_ptr)
+{
+       int r, i;
+       uint32_t total_size, shader_offset;
+       u64 gpu_addr;
+
+       total_size = (regs_size * 3 + 4 + 5 + 5) * 4;
+       total_size = ALIGN(total_size, 256);
+       shader_offset = total_size;
+       total_size += ALIGN(shader_size, 256);
+
+       /* allocate an indirect buffer to put the commands in */
+       memset(ib, 0, sizeof(*ib));
+       r = amdgpu_ib_get(adev, NULL, total_size,
+                                       AMDGPU_IB_POOL_DIRECT, ib);
+       if (r) {
+               dev_err(adev->dev, "failed to get ib (%d).\n", r);
+               return r;
+       }
+
+       /* load the compute shaders */
+       for (i = 0; i < shader_size/sizeof(u32); i++)
+               ib->ptr[i + (shader_offset / 4)] = shader_ptr[i];
+
+       /* init the ib length to 0 */
+       ib->length_dw = 0;
+
+       /* write the register state for the compute dispatch */
+       for (i = 0; i < regs_size; i++) {
+               ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
+               ib->ptr[ib->length_dw++] = SOC15_REG_ENTRY_OFFSET(init_regs[i])
+                                                               - PACKET3_SET_SH_REG_START;
+               ib->ptr[ib->length_dw++] = init_regs[i].reg_value;
+       }
+
+       /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
+       gpu_addr = (ib->gpu_addr + (u64)shader_offset) >> 8;
+       ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
+       ib->ptr[ib->length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_PGM_LO)
+                                                       - PACKET3_SET_SH_REG_START;
+       ib->ptr[ib->length_dw++] = lower_32_bits(gpu_addr);
+       ib->ptr[ib->length_dw++] = upper_32_bits(gpu_addr);
+
+       /* write the wb buffer address */
+       ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 3);
+       ib->ptr[ib->length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_USER_DATA_0)
+                                                       - PACKET3_SET_SH_REG_START;
+       ib->ptr[ib->length_dw++] = lower_32_bits(wb_gpu_addr);
+       ib->ptr[ib->length_dw++] = upper_32_bits(wb_gpu_addr);
+       ib->ptr[ib->length_dw++] = pattern;
+
+       /* write dispatch packet */
+       ib->ptr[ib->length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
+       ib->ptr[ib->length_dw++] = compute_dim_x; /* x */
+       ib->ptr[ib->length_dw++] = 1; /* y */
+       ib->ptr[ib->length_dw++] = 1; /* z */
+       ib->ptr[ib->length_dw++] =
+               REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
+
+       /* shedule the ib on the ring */
+       r = amdgpu_ib_schedule(ring, 1, ib, NULL, fence_ptr);
+       if (r) {
+               dev_err(adev->dev, "ib submit failed (%d).\n", r);
+               amdgpu_ib_free(adev, ib, NULL);
+       }
+       return r;
+}
+
+static void gfx_v9_4_2_log_wave_assignment(struct amdgpu_device *adev, uint32_t *wb_ptr)
+{
+       uint32_t se, cu, simd, wave;
+       uint32_t offset = 0;
+       char *str;
+       int size;
+
+       str = kmalloc(256, GFP_KERNEL);
+       if (!str)
+               return;
+
+       dev_dbg(adev->dev, "wave assignment:\n");
+
+       for (se = 0; se < adev->gfx.config.max_shader_engines; se++) {
+               for (cu = 0; cu < CU_ID_MAX; cu++) {
+                       memset(str, 0, 256);
+                       size = sprintf(str, "SE[%02d]CU[%02d]: ", se, cu);
+                       for (simd = 0; simd < SIMD_ID_MAX; simd++) {
+                               size += sprintf(str + size, "[");
+                               for (wave = 0; wave < WAVE_ID_MAX; wave++) {
+                                       size += sprintf(str + size, "%x", wb_ptr[offset]);
+                                       offset++;
+                               }
+                               size += sprintf(str + size, "]  ");
+                       }
+                       dev_dbg(adev->dev, "%s\n", str);
+               }
+       }
+
+       kfree(str);
+}
+
+static int gfx_v9_4_2_wait_for_waves_assigned(struct amdgpu_device *adev,
+                                             uint32_t *wb_ptr, uint32_t mask,
+                                             uint32_t pattern, uint32_t num_wave, bool wait)
+{
+       uint32_t se, cu, simd, wave;
+       uint32_t loop = 0;
+       uint32_t wave_cnt;
+       uint32_t offset;
+
+       do {
+               wave_cnt = 0;
+               offset = 0;
+
+               for (se = 0; se < adev->gfx.config.max_shader_engines; se++)
+                       for (cu = 0; cu < CU_ID_MAX; cu++)
+                               for (simd = 0; simd < SIMD_ID_MAX; simd++)
+                                       for (wave = 0; wave < WAVE_ID_MAX; wave++) {
+                                               if (((1 << wave) & mask) &&
+                                                   (wb_ptr[offset] == pattern))
+                                                       wave_cnt++;
+
+                                               offset++;
+                                       }
+
+               if (wave_cnt == num_wave)
+                       return 0;
+
+               mdelay(1);
+       } while (++loop < 2000 && wait);
+
+       dev_err(adev->dev, "actual wave num: %d, expected wave num: %d\n",
+               wave_cnt, num_wave);
+
+       gfx_v9_4_2_log_wave_assignment(adev, wb_ptr);
+
+       return -EBADSLT;
+}
+
+static int gfx_v9_4_2_do_sgprs_init(struct amdgpu_device *adev)
+{
+       int r;
+       int wb_size = adev->gfx.config.max_shader_engines *
+                        CU_ID_MAX * SIMD_ID_MAX * WAVE_ID_MAX;
+       struct amdgpu_ib wb_ib;
+       struct amdgpu_ib disp_ibs[3];
+       struct dma_fence *fences[3];
+       u32 pattern[3] = { 0x1, 0x5, 0xa };
+
+       /* bail if the compute ring is not ready */
+       if (!adev->gfx.compute_ring[0].sched.ready ||
+                !adev->gfx.compute_ring[1].sched.ready)
+               return 0;
+
+       /* allocate the write-back buffer from IB */
+       memset(&wb_ib, 0, sizeof(wb_ib));
+       r = amdgpu_ib_get(adev, NULL, (1 + wb_size) * sizeof(uint32_t),
+                         AMDGPU_IB_POOL_DIRECT, &wb_ib);
+       if (r) {
+               dev_err(adev->dev, "failed to get ib (%d) for wb\n", r);
+               return r;
+       }
+       memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t));
+
+       r = gfx_v9_4_2_run_shader(adev,
+                       &adev->gfx.compute_ring[0],
+                       &disp_ibs[0],
+                       sgpr112_init_compute_shader_aldebaran,
+                       sizeof(sgpr112_init_compute_shader_aldebaran),
+                       sgpr112_init_regs_aldebaran,
+                       ARRAY_SIZE(sgpr112_init_regs_aldebaran),
+                       adev->gfx.cu_info.number,
+                       wb_ib.gpu_addr, pattern[0], &fences[0]);
+       if (r) {
+               dev_err(adev->dev, "failed to clear first 224 sgprs\n");
+               goto pro_end;
+       }
+
+       r = gfx_v9_4_2_wait_for_waves_assigned(adev,
+                       &wb_ib.ptr[1], 0b11,
+                       pattern[0],
+                       adev->gfx.cu_info.number * SIMD_ID_MAX * 2,
+                       true);
+       if (r) {
+               dev_err(adev->dev, "wave coverage failed when clear first 224 sgprs\n");
+               wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
+               goto disp0_failed;
+       }
+
+       r = gfx_v9_4_2_run_shader(adev,
+                       &adev->gfx.compute_ring[1],
+                       &disp_ibs[1],
+                       sgpr96_init_compute_shader_aldebaran,
+                       sizeof(sgpr96_init_compute_shader_aldebaran),
+                       sgpr96_init_regs_aldebaran,
+                       ARRAY_SIZE(sgpr96_init_regs_aldebaran),
+                       adev->gfx.cu_info.number * 2,
+                       wb_ib.gpu_addr, pattern[1], &fences[1]);
+       if (r) {
+               dev_err(adev->dev, "failed to clear next 576 sgprs\n");
+               goto disp0_failed;
+       }
+
+       r = gfx_v9_4_2_wait_for_waves_assigned(adev,
+                       &wb_ib.ptr[1], 0b11111100,
+                       pattern[1], adev->gfx.cu_info.number * SIMD_ID_MAX * 6,
+                       true);
+       if (r) {
+               dev_err(adev->dev, "wave coverage failed when clear first 576 sgprs\n");
+               wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
+               goto disp1_failed;
+       }
+
+       wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
+
+       /* wait for the GPU to finish processing the IB */
+       r = dma_fence_wait(fences[0], false);
+       if (r) {
+               dev_err(adev->dev, "timeout to clear first 224 sgprs\n");
+               goto disp1_failed;
+       }
+
+       r = dma_fence_wait(fences[1], false);
+       if (r) {
+               dev_err(adev->dev, "timeout to clear first 576 sgprs\n");
+               goto disp1_failed;
+       }
+
+       memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t));
+       r = gfx_v9_4_2_run_shader(adev,
+                       &adev->gfx.compute_ring[0],
+                       &disp_ibs[2],
+                       sgpr64_init_compute_shader_aldebaran,
+                       sizeof(sgpr64_init_compute_shader_aldebaran),
+                       sgpr64_init_regs_aldebaran,
+                       ARRAY_SIZE(sgpr64_init_regs_aldebaran),
+                       adev->gfx.cu_info.number,
+                       wb_ib.gpu_addr, pattern[2], &fences[2]);
+       if (r) {
+               dev_err(adev->dev, "failed to clear first 256 sgprs\n");
+               goto disp1_failed;
+       }
+
+       r = gfx_v9_4_2_wait_for_waves_assigned(adev,
+                       &wb_ib.ptr[1], 0b1111,
+                       pattern[2],
+                       adev->gfx.cu_info.number * SIMD_ID_MAX * 4,
+                       true);
+       if (r) {
+               dev_err(adev->dev, "wave coverage failed when clear first 256 sgprs\n");
+               wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
+               goto disp2_failed;
+       }
+
+       wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
+
+       r = dma_fence_wait(fences[2], false);
+       if (r) {
+               dev_err(adev->dev, "timeout to clear first 256 sgprs\n");
+               goto disp2_failed;
+       }
+
+disp2_failed:
+       amdgpu_ib_free(adev, &disp_ibs[2], NULL);
+       dma_fence_put(fences[2]);
+disp1_failed:
+       amdgpu_ib_free(adev, &disp_ibs[1], NULL);
+       dma_fence_put(fences[1]);
+disp0_failed:
+       amdgpu_ib_free(adev, &disp_ibs[0], NULL);
+       dma_fence_put(fences[0]);
+pro_end:
+       amdgpu_ib_free(adev, &wb_ib, NULL);
+
+       if (r)
+               dev_info(adev->dev, "Init SGPRS Failed\n");
+       else
+               dev_info(adev->dev, "Init SGPRS Successfully\n");
+
+       return r;
+}
+
+static int gfx_v9_4_2_do_vgprs_init(struct amdgpu_device *adev)
+{
+       int r;
+       /* CU_ID: 0~15, SIMD_ID: 0~3, WAVE_ID: 0 ~ 9 */
+       int wb_size = adev->gfx.config.max_shader_engines *
+                        CU_ID_MAX * SIMD_ID_MAX * WAVE_ID_MAX;
+       struct amdgpu_ib wb_ib;
+       struct amdgpu_ib disp_ib;
+       struct dma_fence *fence;
+       u32 pattern = 0xa;
+
+       /* bail if the compute ring is not ready */
+       if (!adev->gfx.compute_ring[0].sched.ready)
+               return 0;
+
+       /* allocate the write-back buffer from IB */
+       memset(&wb_ib, 0, sizeof(wb_ib));
+       r = amdgpu_ib_get(adev, NULL, (1 + wb_size) * sizeof(uint32_t),
+                         AMDGPU_IB_POOL_DIRECT, &wb_ib);
+       if (r) {
+               dev_err(adev->dev, "failed to get ib (%d) for wb.\n", r);
+               return r;
+       }
+       memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t));
+
+       r = gfx_v9_4_2_run_shader(adev,
+                       &adev->gfx.compute_ring[0],
+                       &disp_ib,
+                       vgpr_init_compute_shader_aldebaran,
+                       sizeof(vgpr_init_compute_shader_aldebaran),
+                       vgpr_init_regs_aldebaran,
+                       ARRAY_SIZE(vgpr_init_regs_aldebaran),
+                       adev->gfx.cu_info.number,
+                       wb_ib.gpu_addr, pattern, &fence);
+       if (r) {
+               dev_err(adev->dev, "failed to clear vgprs\n");
+               goto pro_end;
+       }
+
+       /* wait for the GPU to finish processing the IB */
+       r = dma_fence_wait(fence, false);
+       if (r) {
+               dev_err(adev->dev, "timeout to clear vgprs\n");
+               goto disp_failed;
+       }
+
+       r = gfx_v9_4_2_wait_for_waves_assigned(adev,
+                       &wb_ib.ptr[1], 0b1,
+                       pattern,
+                       adev->gfx.cu_info.number * SIMD_ID_MAX,
+                       false);
+       if (r) {
+               dev_err(adev->dev, "failed to cover all simds when clearing vgprs\n");
+               goto disp_failed;
+       }
+
+disp_failed:
+       amdgpu_ib_free(adev, &disp_ib, NULL);
+       dma_fence_put(fence);
+pro_end:
+       amdgpu_ib_free(adev, &wb_ib, NULL);
+
+       if (r)
+               dev_info(adev->dev, "Init VGPRS Failed\n");
+       else
+               dev_info(adev->dev, "Init VGPRS Successfully\n");
+
+       return r;
+}
+
+int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev)
+{
+       /* only support when RAS is enabled */
+       if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+               return 0;
+
+       gfx_v9_4_2_do_sgprs_init(adev);
+
+       gfx_v9_4_2_do_vgprs_init(adev);
+
+       return 0;
+}
+
 static void gfx_v9_4_2_query_sq_timeout_status(struct amdgpu_device *adev);
 static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev);
 
@@ -808,8 +1442,9 @@ static struct gfx_v9_4_2_utc_block gfx_v9_4_2_utc_blocks[] = {
          REG_SET_FIELD(0, ATC_L2_CACHE_4K_DSM_CNTL, WRITE_COUNTERS, 1) },
 };
 
-static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs =
-       { SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 };
+static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs = {
+       SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16
+};
 
 static int gfx_v9_4_2_get_reg_error_count(struct amdgpu_device *adev,
                                          const struct soc15_reg_entry *reg,
@@ -1039,13 +1674,16 @@ static void gfx_v9_4_2_reset_utc_err_status(struct amdgpu_device *adev)
 static void gfx_v9_4_2_reset_ea_err_status(struct amdgpu_device *adev)
 {
        uint32_t i, j;
+       uint32_t value;
+
+       value = REG_SET_FIELD(0, GCEA_ERR_STATUS, CLEAR_ERROR_STATUS, 0x1);
 
        mutex_lock(&adev->grbm_idx_mutex);
        for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) {
                for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance;
                     j++) {
                        gfx_v9_4_2_select_se_sh(adev, i, 0, j);
-                       WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), 0x10);
+                       WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), value);
                }
        }
        gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
index 81c5833b6b9f5e9f4b9b278034b1106736addbf2..6db1f88509afd4566601c67e0ed84bf99487368b 100644 (file)
@@ -29,6 +29,7 @@ void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev,
 void gfx_v9_4_2_init_golden_registers(struct amdgpu_device *adev,
                                      uint32_t die_id);
 void gfx_v9_4_2_set_power_brake_sequence(struct amdgpu_device *adev);
+int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev);
 
 extern const struct amdgpu_gfx_ras_funcs gfx_v9_4_2_ras_funcs;
 
index 1e4678cb98f01fe18713c0779808b4475bd37a0d..a03fdd41212b0b980796d79c039c418b78cf3f45 100644 (file)
@@ -283,10 +283,14 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
                tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
                                    PAGE_TABLE_BLOCK_SIZE,
                                    block_size);
-               /* Send no-retry XNACK on fault to suppress VM fault storm. */
+               /* Send no-retry XNACK on fault to suppress VM fault storm.
+                * On Aldebaran, XNACK can be enabled in the SQ per-process.
+                * Retry faults need to be enabled for that to work.
+                */
                tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
                                    RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
-                                   !adev->gmc.noretry);
+                                   !adev->gmc.noretry ||
+                                   adev->asic_type == CHIP_ALDEBARAN);
                WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL,
                                    i * hub->ctx_distance, tmp);
                WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
index 455bb91060d0bc571150fa20afa6a395e3345750..093ab98c31bb508a9b452af5441539dab72798e7 100644 (file)
@@ -53,6 +53,7 @@
 #include "mmhub_v1_7.h"
 #include "umc_v6_1.h"
 #include "umc_v6_0.h"
+#include "hdp_v4_0.h"
 
 #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
 
@@ -1210,6 +1211,11 @@ static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev)
        adev->gfxhub.funcs = &gfxhub_v1_0_funcs;
 }
 
+static void gmc_v9_0_set_hdp_ras_funcs(struct amdgpu_device *adev)
+{
+       adev->hdp.ras_funcs = &hdp_v4_0_ras_funcs;
+}
+
 static int gmc_v9_0_early_init(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1230,6 +1236,7 @@ static int gmc_v9_0_early_init(void *handle)
        gmc_v9_0_set_mmhub_funcs(adev);
        gmc_v9_0_set_mmhub_ras_funcs(adev);
        gmc_v9_0_set_gfxhub_funcs(adev);
+       gmc_v9_0_set_hdp_ras_funcs(adev);
 
        adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
        adev->gmc.shared_aperture_end =
@@ -1255,7 +1262,7 @@ static int gmc_v9_0_late_init(void *handle)
         * writes, while disables HBM ECC for vega10.
         */
        if (!amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_VEGA10)) {
-               if (!(adev->ras_features & (1 << AMDGPU_RAS_BLOCK__UMC))) {
+               if (!(adev->ras_enabled & (1 << AMDGPU_RAS_BLOCK__UMC))) {
                        if (adev->df.funcs->enable_ecc_force_par_wr_rmw)
                                adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
                }
@@ -1265,6 +1272,10 @@ static int gmc_v9_0_late_init(void *handle)
            adev->mmhub.ras_funcs->reset_ras_error_count)
                adev->mmhub.ras_funcs->reset_ras_error_count(adev);
 
+       if (adev->hdp.ras_funcs &&
+           adev->hdp.ras_funcs->reset_ras_error_count)
+               adev->hdp.ras_funcs->reset_ras_error_count(adev);
+
        r = amdgpu_gmc_ras_late_init(adev);
        if (r)
                return r;
index edbd35d293ebfe83d766de96f2343ddd7d6fc62e..74b90cc2bf48048e7f0f0e02797cdf73f0967fcb 100644 (file)
@@ -59,12 +59,31 @@ static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev,
                        HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
 }
 
+static void hdp_v4_0_query_ras_error_count(struct amdgpu_device *adev,
+                                          void *ras_error_status)
+{
+       struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+       err_data->ue_count = 0;
+       err_data->ce_count = 0;
+
+       if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP))
+               return;
+
+       /* HDP SRAM errors are uncorrectable ones (i.e. fatal errors) */
+       err_data->ue_count += RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT);
+};
+
 static void hdp_v4_0_reset_ras_error_count(struct amdgpu_device *adev)
 {
        if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP))
                return;
-       /*read back hdp ras counter to reset it to 0 */
-       RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT);
+
+       if (adev->asic_type >= CHIP_ALDEBARAN)
+               WREG32_SOC15(HDP, 0, mmHDP_EDC_CNT, 0);
+       else
+               /*read back hdp ras counter to reset it to 0 */
+               RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT);
 }
 
 static void hdp_v4_0_update_clock_gating(struct amdgpu_device *adev,
@@ -130,10 +149,16 @@ static void hdp_v4_0_init_registers(struct amdgpu_device *adev)
        WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40));
 }
 
+const struct amdgpu_hdp_ras_funcs hdp_v4_0_ras_funcs = {
+       .ras_late_init = amdgpu_hdp_ras_late_init,
+       .ras_fini = amdgpu_hdp_ras_fini,
+       .query_ras_error_count = hdp_v4_0_query_ras_error_count,
+       .reset_ras_error_count = hdp_v4_0_reset_ras_error_count,
+};
+
 const struct amdgpu_hdp_funcs hdp_v4_0_funcs = {
        .flush_hdp = hdp_v4_0_flush_hdp,
        .invalidate_hdp = hdp_v4_0_invalidate_hdp,
-       .reset_ras_error_count = hdp_v4_0_reset_ras_error_count,
        .update_clock_gating = hdp_v4_0_update_clock_gating,
        .get_clock_gating_state = hdp_v4_0_get_clockgating_state,
        .init_registers = hdp_v4_0_init_registers,
index d1e6399e8c46ef9e86cf9e521f11bbdf73474a26..dc3a1b81dd62bbccd33adea191bec8da235391b6 100644 (file)
@@ -27,5 +27,6 @@
 #include "soc15_common.h"
 
 extern const struct amdgpu_hdp_funcs hdp_v4_0_funcs;
+extern const struct amdgpu_hdp_ras_funcs hdp_v4_0_ras_funcs;
 
 #endif
index 0103a5ab28e60d9c57372f1d5bf57f593ac1d670..6264934b67ac00b612cc74677d44818fb13d83b3 100644 (file)
@@ -296,10 +296,12 @@ static void mmhub_v1_7_setup_vmid_config(struct amdgpu_device *adev)
                tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
                                    PAGE_TABLE_BLOCK_SIZE,
                                    block_size);
-               /* Send no-retry XNACK on fault to suppress VM fault storm. */
+               /* On Aldebaran, XNACK can be enabled in the SQ per-process.
+                * Retry faults need to be enabled for that to work.
+                */
                tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
                                    RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
-                                   !adev->gmc.noretry);
+                                   1);
                WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_CNTL,
                                    i * hub->ctx_distance, tmp);
                WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
@@ -1313,12 +1315,31 @@ static void mmhub_v1_7_query_ras_error_status(struct amdgpu_device *adev)
        }
 }
 
+static void mmhub_v1_7_reset_ras_error_status(struct amdgpu_device *adev)
+{
+       int i;
+       uint32_t reg_value;
+
+       if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB))
+               return;
+
+       for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_ea_err_status_regs); i++) {
+               reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(
+                       mmhub_v1_7_ea_err_status_regs[i]));
+               reg_value = REG_SET_FIELD(reg_value, MMEA0_ERR_STATUS,
+                                         CLEAR_ERROR_STATUS, 0x01);
+               WREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_ea_err_status_regs[i]),
+                      reg_value);
+       }
+}
+
 const struct amdgpu_mmhub_ras_funcs mmhub_v1_7_ras_funcs = {
        .ras_late_init = amdgpu_mmhub_ras_late_init,
        .ras_fini = amdgpu_mmhub_ras_fini,
        .query_ras_error_count = mmhub_v1_7_query_ras_error_count,
        .reset_ras_error_count = mmhub_v1_7_reset_ras_error_count,
        .query_ras_error_status = mmhub_v1_7_query_ras_error_status,
+       .reset_ras_error_status = mmhub_v1_7_reset_ras_error_status,
 };
 
 const struct amdgpu_mmhub_funcs mmhub_v1_7_funcs = {
index a9899335d0b1fb0fea8e6168d85088c53e99d1d7..709ac576ac7e892c665c0db8195c6af943cd37ba 100644 (file)
@@ -569,9 +569,9 @@ static int mmhub_v2_3_set_clockgating(struct amdgpu_device *adev,
                return 0;
 
        mmhub_v2_3_update_medium_grain_clock_gating(adev,
-                       state == AMD_CG_STATE_GATE ? true : false);
+                               state == AMD_CG_STATE_GATE);
        mmhub_v2_3_update_medium_grain_light_sleep(adev,
-                       state == AMD_CG_STATE_GATE ? true : false);
+                               state == AMD_CG_STATE_GATE);
 
        return 0;
 }
index d290ca0b06da87dcf380a7965065ca7ae99d1914..75d1f9b939b2274231fff343adb201652c65754a 100644 (file)
@@ -598,7 +598,7 @@ static void nv_pcie_gen3_enable(struct amdgpu_device *adev)
 
 static void nv_program_aspm(struct amdgpu_device *adev)
 {
-       if (amdgpu_aspm != 1)
+       if (!amdgpu_aspm)
                return;
 
        if (!(adev->flags & AMD_IS_APU) &&
@@ -1068,6 +1068,7 @@ static int nv_common_early_init(void *handle)
        case CHIP_SIENNA_CICHLID:
                adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
                        AMD_CG_SUPPORT_GFX_CGCG |
+                       AMD_CG_SUPPORT_GFX_CGLS |
                        AMD_CG_SUPPORT_GFX_3D_CGCG |
                        AMD_CG_SUPPORT_MC_MGCG |
                        AMD_CG_SUPPORT_VCN_MGCG |
@@ -1091,6 +1092,7 @@ static int nv_common_early_init(void *handle)
        case CHIP_NAVY_FLOUNDER:
                adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
                        AMD_CG_SUPPORT_GFX_CGCG |
+                       AMD_CG_SUPPORT_GFX_CGLS |
                        AMD_CG_SUPPORT_GFX_3D_CGCG |
                        AMD_CG_SUPPORT_VCN_MGCG |
                        AMD_CG_SUPPORT_JPEG_MGCG |
@@ -1121,6 +1123,8 @@ static int nv_common_early_init(void *handle)
                        AMD_CG_SUPPORT_MC_LS |
                        AMD_CG_SUPPORT_GFX_FGCG |
                        AMD_CG_SUPPORT_VCN_MGCG |
+                       AMD_CG_SUPPORT_SDMA_MGCG |
+                       AMD_CG_SUPPORT_SDMA_LS |
                        AMD_CG_SUPPORT_JPEG_MGCG;
                adev->pg_flags = AMD_PG_SUPPORT_GFX_PG |
                        AMD_PG_SUPPORT_VCN |
@@ -1132,6 +1136,7 @@ static int nv_common_early_init(void *handle)
        case CHIP_DIMGREY_CAVEFISH:
                adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
                        AMD_CG_SUPPORT_GFX_CGCG |
+                       AMD_CG_SUPPORT_GFX_CGLS |
                        AMD_CG_SUPPORT_GFX_3D_CGCG |
                        AMD_CG_SUPPORT_VCN_MGCG |
                        AMD_CG_SUPPORT_JPEG_MGCG |
index 96064c34316341e25dece6f51a1fd8c89305d4c2..f6d3180febc457f73f24afa08ffc5cd8b418ae01 100644 (file)
@@ -97,7 +97,6 @@ enum psp_gfx_cmd_id
     GFX_CMD_ID_SETUP_VMR          = 0x00000009,   /* setup VMR region */
     GFX_CMD_ID_DESTROY_VMR        = 0x0000000A,   /* destroy VMR region */
     GFX_CMD_ID_PROG_REG           = 0x0000000B,   /* program regs */
-    GFX_CMD_ID_CLEAR_VF_FW        = 0x0000000D,   /* Clear VF FW, to be used on VF shutdown. */
     GFX_CMD_ID_GET_FW_ATTESTATION = 0x0000000F,   /* Query GPUVA of the Fw Attestation DB */
     /* IDs upto 0x1F are reserved for older programs (Raven, Vega 10/12/20) */
     GFX_CMD_ID_LOAD_TOC           = 0x00000020,   /* Load TOC and obtain TMR size */
index 5715be6770ecc0314e0c40d71b9412a7771d474a..d197185f778903182c2ec8c0750cb271ab8a92a1 100644 (file)
@@ -1109,6 +1109,8 @@ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
                if (adev->asic_type == CHIP_ARCTURUS &&
                    adev->sdma.instance[i].fw_version >= 14)
                        WREG32_SDMA(i, mmSDMA0_PUB_DUMMY_REG2, enable);
+               /* Extend page fault timeout to avoid interrupt storm */
+               WREG32_SDMA(i, mmSDMA0_UTCL1_TIMEOUT, 0x00800080);
        }
 
 }
@@ -2227,7 +2229,7 @@ static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev,
        memset(&task_info, 0, sizeof(struct amdgpu_task_info));
        amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
 
-       dev_info(adev->dev,
+       dev_dbg_ratelimited(adev->dev,
                   "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u "
                   "pasid:%u, for process %s pid %d thread %s pid %d\n",
                   instance, addr, entry->src_id, entry->ring_id, entry->vmid,
@@ -2240,7 +2242,7 @@ static int sdma_v4_0_process_vm_hole_irq(struct amdgpu_device *adev,
                                              struct amdgpu_irq_src *source,
                                              struct amdgpu_iv_entry *entry)
 {
-       dev_err(adev->dev, "MC or SEM address in VM hole\n");
+       dev_dbg_ratelimited(adev->dev, "MC or SEM address in VM hole\n");
        sdma_v4_0_print_iv_entry(adev, entry);
        return 0;
 }
@@ -2249,7 +2251,7 @@ static int sdma_v4_0_process_doorbell_invalid_irq(struct amdgpu_device *adev,
                                              struct amdgpu_irq_src *source,
                                              struct amdgpu_iv_entry *entry)
 {
-       dev_err(adev->dev, "SDMA received a doorbell from BIF with byte_enable !=0xff\n");
+       dev_dbg_ratelimited(adev->dev, "SDMA received a doorbell from BIF with byte_enable !=0xff\n");
        sdma_v4_0_print_iv_entry(adev, entry);
        return 0;
 }
@@ -2258,7 +2260,7 @@ static int sdma_v4_0_process_pool_timeout_irq(struct amdgpu_device *adev,
                                              struct amdgpu_irq_src *source,
                                              struct amdgpu_iv_entry *entry)
 {
-       dev_err(adev->dev,
+       dev_dbg_ratelimited(adev->dev,
                "Polling register/memory timeout executing POLL_REG/MEM with finite timer\n");
        sdma_v4_0_print_iv_entry(adev, entry);
        return 0;
@@ -2268,7 +2270,7 @@ static int sdma_v4_0_process_srbm_write_irq(struct amdgpu_device *adev,
                                              struct amdgpu_irq_src *source,
                                              struct amdgpu_iv_entry *entry)
 {
-       dev_err(adev->dev,
+       dev_dbg_ratelimited(adev->dev,
                "SDMA gets an Register Write SRBM_WRITE command in non-privilege command buffer\n");
        sdma_v4_0_print_iv_entry(adev, entry);
        return 0;
@@ -2597,27 +2599,18 @@ static const struct amdgpu_irq_src_funcs sdma_v4_0_srbm_write_irq_funcs = {
 
 static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
 {
+       adev->sdma.trap_irq.num_types = adev->sdma.num_instances;
+       adev->sdma.ecc_irq.num_types = adev->sdma.num_instances;
+       /*For Arcturus and Aldebaran, add another 4 irq handler*/
        switch (adev->sdma.num_instances) {
-       case 1:
-               adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1;
-               adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1;
-               break;
        case 5:
-               adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5;
-               adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5;
-               break;
        case 8:
-               adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
-               adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
-               adev->sdma.vm_hole_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5;
-               adev->sdma.doorbell_invalid_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
-               adev->sdma.pool_timeout_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
-               adev->sdma.srbm_write_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
+               adev->sdma.vm_hole_irq.num_types = adev->sdma.num_instances;
+               adev->sdma.doorbell_invalid_irq.num_types = adev->sdma.num_instances;
+               adev->sdma.pool_timeout_irq.num_types = adev->sdma.num_instances;
+               adev->sdma.srbm_write_irq.num_types = adev->sdma.num_instances;
                break;
-       case 2:
        default:
-               adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2;
-               adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2;
                break;
        }
        adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs;
index 920fc6d4a1273b2470c9fbe071e10385dbe3183b..04c68a79eca46c4251b849af3a548b2f5b976c65 100644 (file)
@@ -405,18 +405,6 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
        unsigned vmid = AMDGPU_JOB_GET_VMID(job);
        uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
 
-       /* Invalidate L2, because if we don't do it, we might get stale cache
-        * lines from previous IBs.
-        */
-       amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ));
-       amdgpu_ring_write(ring, 0);
-       amdgpu_ring_write(ring, (SDMA_GCR_GL2_INV |
-                                SDMA_GCR_GL2_WB |
-                                SDMA_GCR_GLM_INV |
-                                SDMA_GCR_GLM_WB) << 16);
-       amdgpu_ring_write(ring, 0xffffff80);
-       amdgpu_ring_write(ring, 0xffff);
-
        /* An IB packet must end on a 8 DW boundary--the next dword
         * must be on a 8-dword boundary. Our IB packet below is 6
         * dwords long, thus add x number of NOPs, such that, in
@@ -437,6 +425,33 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
        amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
 }
 
+/**
+ * sdma_v5_0_ring_emit_mem_sync - flush the IB by graphics cache rinse
+ *
+ * @ring: amdgpu ring pointer
+ * @job: job to retrieve vmid from
+ * @ib: IB object to schedule
+ *
+ * flush the IB by graphics cache rinse.
+ */
+static void sdma_v5_0_ring_emit_mem_sync(struct amdgpu_ring *ring)
+{
+    uint32_t gcr_cntl =
+                   SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV |
+                       SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV |
+                       SDMA_GCR_GLI_INV(1);
+
+       /* flush entire cache L0/L1/L2, this can be optimized by performance requirement */
+       amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ));
+       amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0));
+       amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(gcr_cntl) |
+                       SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(0));
+       amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(0) |
+                       SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(gcr_cntl >> 16));
+       amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(0) |
+                       SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(0));
+}
+
 /**
  * sdma_v5_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
  *
@@ -1643,6 +1658,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
                10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
        .emit_ib_size = 5 + 7 + 6, /* sdma_v5_0_ring_emit_ib */
        .emit_ib = sdma_v5_0_ring_emit_ib,
+       .emit_mem_sync = sdma_v5_0_ring_emit_mem_sync,
        .emit_fence = sdma_v5_0_ring_emit_fence,
        .emit_pipeline_sync = sdma_v5_0_ring_emit_pipeline_sync,
        .emit_vm_flush = sdma_v5_0_ring_emit_vm_flush,
index b1ad9e52b2347e7c0d2e082a749b906acfe9bc4c..7c4e0586e26d1d9b34b0714d909ddb89bf1b0939 100644 (file)
@@ -1556,6 +1556,10 @@ static void sdma_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *ade
        int i;
 
        for (i = 0; i < adev->sdma.num_instances; i++) {
+
+               if (adev->sdma.instance[i].fw_version < 70 && adev->asic_type == CHIP_VANGOGH)
+                       adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_MGCG;
+
                if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
                        /* Enable sdma clock gating */
                        def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL));
@@ -1589,6 +1593,10 @@ static void sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device *adev
        int i;
 
        for (i = 0; i < adev->sdma.num_instances; i++) {
+
+               if (adev->sdma.instance[i].fw_version < 70 && adev->asic_type == CHIP_VANGOGH)
+                       adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_LS;
+
                if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
                        /* Enable sdma mem light sleep */
                        def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL));
index 079b094c48ad7fdd0a173c30a80b1d0b65f2234f..3c47c94846d6d11685b07a606fb9bff3f9358184 100644 (file)
@@ -88,6 +88,23 @@ static u32 smuio_v13_0_get_die_id(struct amdgpu_device *adev)
        return die_id;
 }
 
+/**
+ * smuio_v13_0_get_socket_id - query socket id from FCH
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns socket id
+ */
+static u32 smuio_v13_0_get_socket_id(struct amdgpu_device *adev)
+{
+       u32 data, socket_id;
+
+       data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+       socket_id = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, SOCKET_ID);
+
+       return socket_id;
+}
+
 /**
  * smuio_v13_0_supports_host_gpu_xgmi - detect xgmi interface between cpu and gpu/s.
  *
@@ -115,6 +132,7 @@ const struct amdgpu_smuio_funcs smuio_v13_0_funcs = {
        .get_rom_index_offset = smuio_v13_0_get_rom_index_offset,
        .get_rom_data_offset = smuio_v13_0_get_rom_data_offset,
        .get_die_id = smuio_v13_0_get_die_id,
+       .get_socket_id = smuio_v13_0_get_socket_id,
        .is_host_gpu_xgmi_supported = smuio_v13_0_is_host_gpu_xgmi_supported,
        .update_rom_clock_gating = smuio_v13_0_update_rom_clock_gating,
        .get_clock_gating_state = smuio_v13_0_get_clock_gating_state,
index 8e1b9a40839fcd847c944a5fb530fa558b5a41b2..4b660b2d1c229dabb470994970d4153ce36363b3 100644 (file)
@@ -655,7 +655,7 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev)
        int ret = 0;
 
        /* avoid NBIF got stuck when do RAS recovery in BACO reset */
-       if (ras && ras->supported)
+       if (ras && adev->ras_enabled)
                adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
 
        ret = amdgpu_dpm_baco_reset(adev);
@@ -663,7 +663,7 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev)
                return ret;
 
        /* re-enable doorbell interrupt after BACO exit */
-       if (ras && ras->supported)
+       if (ras && adev->ras_enabled)
                adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
 
        return 0;
@@ -710,7 +710,8 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
                 * 1. PMFW version > 0x284300: all cases use baco
                 * 2. PMFW version <= 0x284300: only sGPU w/o RAS use baco
                 */
-               if ((ras && ras->supported) && adev->pm.fw_version <= 0x283400)
+               if (ras && adev->ras_enabled &&
+                   adev->pm.fw_version <= 0x283400)
                        baco_reset = false;
                break;
        case CHIP_ALDEBARAN:
@@ -816,7 +817,7 @@ static void soc15_pcie_gen3_enable(struct amdgpu_device *adev)
 
 static void soc15_program_aspm(struct amdgpu_device *adev)
 {
-       if (amdgpu_aspm != 1)
+       if (!amdgpu_aspm)
                return;
 
        if (!(adev->flags & AMD_IS_APU) &&
@@ -1522,9 +1523,6 @@ static int soc15_common_late_init(void *handle)
        if (amdgpu_sriov_vf(adev))
                xgpu_ai_mailbox_get_irq(adev);
 
-       if (adev->hdp.funcs->reset_ras_error_count)
-               adev->hdp.funcs->reset_ras_error_count(adev);
-
        if (adev->nbio.ras_funcs &&
            adev->nbio.ras_funcs->ras_late_init)
                r = adev->nbio.ras_funcs->ras_late_init(adev);
index 745ed0fba1ed9e56acfe70ea66b3f258ef8eb2f0..0f214a398dd8fd6d108463489367ca73bf05a0f3 100644 (file)
@@ -105,6 +105,12 @@ struct ta_ras_trigger_error_input {
        uint64_t                value;                  // method if error injection. i.e persistent, coherent etc.
 };
 
+struct ta_ras_init_flags
+{
+    uint8_t     poison_mode_en;
+    uint8_t     dgpu_mode;
+};
+
 struct ta_ras_output_flags
 {
        uint8_t    ras_init_success_flag;
@@ -115,6 +121,7 @@ struct ta_ras_output_flags
 /* Common input structure for RAS callbacks */
 /**********************************************************/
 union ta_ras_cmd_input {
+       struct ta_ras_init_flags                init_flags;
        struct ta_ras_enable_features_input     enable_features;
        struct ta_ras_disable_features_input    disable_features;
        struct ta_ras_trigger_error_input       trigger_error;
index ca8efa5c6978d5e6aa15dc7dd5578518f99c2b32..2f17c8a5701599cf0f92eba534a18d8350556778 100644 (file)
@@ -104,6 +104,7 @@ static int vega10_ih_toggle_ring_interrupts(struct amdgpu_device *adev,
 
        tmp = RREG32(ih_regs->ih_rb_cntl);
        tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
+       tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_GPU_TS_ENABLE, 1);
        /* enable_intr field is only valid in ring0 */
        if (ih == &adev->irq.ih)
                tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0));
index 735ebbd1148fd1160abefa7dd071dd7dd0a0a7be..3d21c07990373f0083ae75aa87c4dbe140cc1fee 100644 (file)
@@ -1136,7 +1136,7 @@ static void vi_program_aspm(struct amdgpu_device *adev)
        bool bL1SS = false;
        bool bClkReqSupport = true;
 
-       if (amdgpu_aspm != 1)
+       if (!amdgpu_aspm)
                return;
 
        if (adev->flags & AMD_IS_APU ||
index f02c938f75dad1b712a1f8a8818ef889bf2fc7c7..8cc0a76ddf9fa40136f4fd260bdaeb699851dec7 100644 (file)
@@ -12,3 +12,16 @@ config HSA_AMD
        select DRM_AMDGPU_USERPTR
        help
          Enable this if you want to use HSA features on AMD GPU devices.
+
+config HSA_AMD_SVM
+       bool "Enable HMM-based shared virtual memory manager"
+       depends on HSA_AMD && DEVICE_PRIVATE
+       default y
+       select HMM_MIRROR
+       select MMU_NOTIFIER
+       help
+         Enable this to use unified memory and managed memory in HIP. This
+         memory manager supports two modes of operation. One based on
+         preemptions and one based on page faults. To enable page fault
+         based memory management on most GFXv9 GPUs, set the module
+         parameter amdgpu.noretry=0.
index e1e4115dcf782008d682860177792a4819caf485..c4f3aff110728c858b76376cee668de45518c664 100644 (file)
@@ -63,3 +63,8 @@ endif
 ifneq ($(CONFIG_DEBUG_FS),)
 AMDKFD_FILES += $(AMDKFD_PATH)/kfd_debugfs.o
 endif
+
+ifneq ($(CONFIG_HSA_AMD_SVM),)
+AMDKFD_FILES += $(AMDKFD_PATH)/kfd_svm.o \
+               $(AMDKFD_PATH)/kfd_migrate.o
+endif
index 43de260b2230878346215a744877ac3580472394..059c3f1ca27d533367e399155beffd729d144944 100644 (file)
@@ -38,6 +38,7 @@
 #include "kfd_priv.h"
 #include "kfd_device_queue_manager.h"
 #include "kfd_dbgmgr.h"
+#include "kfd_svm.h"
 #include "amdgpu_amdkfd.h"
 #include "kfd_smi_events.h"
 
@@ -1297,7 +1298,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
 
        err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
                dev->kgd, args->va_addr, args->size,
-               pdd->vm, (struct kgd_mem **) &mem, &offset,
+               pdd->drm_priv, (struct kgd_mem **) &mem, &offset,
                flags);
 
        if (err)
@@ -1328,7 +1329,8 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
        return 0;
 
 err_free:
-       amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL);
+       amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem,
+                                              pdd->drm_priv, NULL);
 err_unlock:
        mutex_unlock(&p->mutex);
        return err;
@@ -1365,7 +1367,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
        }
 
        ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd,
-                                               (struct kgd_mem *)mem, &size);
+                               (struct kgd_mem *)mem, pdd->drm_priv, &size);
 
        /* If freeing the buffer failed, leave the handle in place for
         * clean-up during process tear-down.
@@ -1448,7 +1450,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
                        goto get_mem_obj_from_handle_failed;
                }
                err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
-                       peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
+                       peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv);
                if (err) {
                        pr_err("Failed to map to gpu %d/%d\n",
                               i, args->n_devices);
@@ -1555,7 +1557,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
                        goto get_mem_obj_from_handle_failed;
                }
                err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
-                       peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
+                       peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv);
                if (err) {
                        pr_err("Failed to unmap from gpu %d/%d\n",
                               i, args->n_devices);
@@ -1701,7 +1703,7 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
        }
 
        r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf,
-                                             args->va_addr, pdd->vm,
+                                             args->va_addr, pdd->drm_priv,
                                              (struct kgd_mem **)&mem, &size,
                                              NULL);
        if (r)
@@ -1721,7 +1723,8 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
        return 0;
 
 err_free:
-       amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL);
+       amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem,
+                                              pdd->drm_priv, NULL);
 err_unlock:
        mutex_unlock(&p->mutex);
        dma_buf_put(dmabuf);
@@ -1742,6 +1745,64 @@ static int kfd_ioctl_smi_events(struct file *filep,
        return kfd_smi_event_open(dev, &args->anon_fd);
 }
 
+static int kfd_ioctl_set_xnack_mode(struct file *filep,
+                                   struct kfd_process *p, void *data)
+{
+       struct kfd_ioctl_set_xnack_mode_args *args = data;
+       int r = 0;
+
+       mutex_lock(&p->mutex);
+       if (args->xnack_enabled >= 0) {
+               if (!list_empty(&p->pqm.queues)) {
+                       pr_debug("Process has user queues running\n");
+                       mutex_unlock(&p->mutex);
+                       return -EBUSY;
+               }
+               if (args->xnack_enabled && !kfd_process_xnack_mode(p, true))
+                       r = -EPERM;
+               else
+                       p->xnack_enabled = args->xnack_enabled;
+       } else {
+               args->xnack_enabled = p->xnack_enabled;
+       }
+       mutex_unlock(&p->mutex);
+
+       return r;
+}
+
+#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
+static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
+{
+       struct kfd_ioctl_svm_args *args = data;
+       int r = 0;
+
+       if (p->svm_disabled)
+               return -EPERM;
+
+       pr_debug("start 0x%llx size 0x%llx op 0x%x nattr 0x%x\n",
+                args->start_addr, args->size, args->op, args->nattr);
+
+       if ((args->start_addr & ~PAGE_MASK) || (args->size & ~PAGE_MASK))
+               return -EINVAL;
+       if (!args->start_addr || !args->size)
+               return -EINVAL;
+
+       mutex_lock(&p->mutex);
+
+       r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr,
+                     args->attrs);
+
+       mutex_unlock(&p->mutex);
+
+       return r;
+}
+#else
+static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
+{
+       return -EPERM;
+}
+#endif
+
 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
        [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
                            .cmd_drv = 0, .name = #ioctl}
@@ -1840,6 +1901,11 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 
        AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,
                        kfd_ioctl_smi_events, 0),
+
+       AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0),
+
+       AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,
+                       kfd_ioctl_set_xnack_mode, 0),
 };
 
 #define AMDKFD_CORE_IOCTL_COUNT        ARRAY_SIZE(amdkfd_ioctls)
index c60e82697385d3d65870b67720fe7139510b919b..c1815b708ac89ef06aeebbd4c35c6c5678040880 100644 (file)
@@ -55,7 +55,7 @@ struct kfd_gpu_cache_info {
        uint32_t        cache_level;
        uint32_t        flags;
        /* Indicates how many Compute Units share this cache
-        * Value = 1 indicates the cache is not shared
+        * within a SA. Value = 1 indicates the cache is not shared
         */
        uint32_t        num_cu_shared;
 };
@@ -69,32 +69,456 @@ static struct kfd_gpu_cache_info kaveri_cache_info[] = {
                                CRAT_CACHE_FLAGS_DATA_CACHE |
                                CRAT_CACHE_FLAGS_SIMD_CACHE),
                .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache (in SQC module) per bank */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* Scalar L1 Data Cache (in SQC module) per bank */
+               .cache_size = 8,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
 
+       /* TODO: Add L2 Cache information */
+};
+
+
+static struct kfd_gpu_cache_info carrizo_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache (in SQC module) per bank */
+               .cache_size = 8,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 4,
+       },
+       {
+               /* Scalar L1 Data Cache (in SQC module) per bank. */
+               .cache_size = 4,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 4,
+       },
+
+       /* TODO: Add L2 Cache information */
+};
+
+#define hawaii_cache_info kaveri_cache_info
+#define tonga_cache_info carrizo_cache_info
+#define fiji_cache_info  carrizo_cache_info
+#define polaris10_cache_info carrizo_cache_info
+#define polaris11_cache_info carrizo_cache_info
+#define polaris12_cache_info carrizo_cache_info
+#define vegam_cache_info carrizo_cache_info
+
+/* NOTE: L1 cache information has been updated and L2/L3
+ * cache information has been added for Vega10 and
+ * newer ASICs. The unit for cache_size is KiB.
+ * In future,  check & update cache details
+ * for every new ASIC is required.
+ */
+
+static struct kfd_gpu_cache_info vega10_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache per SQC */
+               .cache_size = 32,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 3,
+       },
+       {
+               /* Scalar L1 Data Cache per SQC */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 3,
+       },
+       {
+               /* L2 Data Cache per GPU (Total Tex Cache) */
+               .cache_size = 4096,
+               .cache_level = 2,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 16,
+       },
+};
+
+static struct kfd_gpu_cache_info raven_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache per SQC */
+               .cache_size = 32,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 3,
+       },
+       {
+               /* Scalar L1 Data Cache per SQC */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 3,
+       },
+       {
+               /* L2 Data Cache per GPU (Total Tex Cache) */
+               .cache_size = 1024,
+               .cache_level = 2,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 11,
+       },
+};
+
+static struct kfd_gpu_cache_info renoir_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache per SQC */
+               .cache_size = 32,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 3,
+       },
+       {
+               /* Scalar L1 Data Cache per SQC */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 3,
+       },
+       {
+               /* L2 Data Cache per GPU (Total Tex Cache) */
+               .cache_size = 1024,
+               .cache_level = 2,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 8,
+       },
+};
+
+static struct kfd_gpu_cache_info vega12_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache per SQC */
+               .cache_size = 32,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 3,
+       },
+       {
+               /* Scalar L1 Data Cache per SQC */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 3,
+       },
+       {
+               /* L2 Data Cache per GPU (Total Tex Cache) */
+               .cache_size = 2048,
+               .cache_level = 2,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 5,
+       },
+};
+
+static struct kfd_gpu_cache_info vega20_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache per SQC */
+               .cache_size = 32,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 3,
+       },
+       {
+               /* Scalar L1 Data Cache per SQC */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 3,
+       },
+       {
+               /* L2 Data Cache per GPU (Total Tex Cache) */
+               .cache_size = 8192,
+               .cache_level = 2,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 16,
+       },
+};
+
+static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache per SQC */
+               .cache_size = 32,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* Scalar L1 Data Cache per SQC */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* L2 Data Cache per GPU (Total Tex Cache) */
+               .cache_size = 8192,
+               .cache_level = 2,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 14,
+       },
+};
+
+static struct kfd_gpu_cache_info navi10_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache per SQC */
+               .cache_size = 32,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* Scalar L1 Data Cache per SQC */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* GL1 Data Cache per SA */
+               .cache_size = 128,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 10,
+       },
+       {
+               /* L2 Data Cache per GPU (Total Tex Cache) */
+               .cache_size = 4096,
+               .cache_level = 2,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 10,
+       },
+};
+
+static struct kfd_gpu_cache_info vangogh_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache per SQC */
+               .cache_size = 32,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* Scalar L1 Data Cache per SQC */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* GL1 Data Cache per SA */
+               .cache_size = 128,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 8,
        },
        {
-               /* Scalar L1 Instruction Cache (in SQC module) per bank */
+               /* L2 Data Cache per GPU (Total Tex Cache) */
+               .cache_size = 1024,
+               .cache_level = 2,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 8,
+       },
+};
+
+static struct kfd_gpu_cache_info navi14_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
                .cache_size = 16,
                .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache per SQC */
+               .cache_size = 32,
+               .cache_level = 1,
                .flags = (CRAT_CACHE_FLAGS_ENABLED |
                                CRAT_CACHE_FLAGS_INST_CACHE |
                                CRAT_CACHE_FLAGS_SIMD_CACHE),
                .num_cu_shared = 2,
        },
        {
-               /* Scalar L1 Data Cache (in SQC module) per bank */
-               .cache_size = 8,
+               /* Scalar L1 Data Cache per SQC */
+               .cache_size = 16,
                .cache_level = 1,
                .flags = (CRAT_CACHE_FLAGS_ENABLED |
                                CRAT_CACHE_FLAGS_DATA_CACHE |
                                CRAT_CACHE_FLAGS_SIMD_CACHE),
                .num_cu_shared = 2,
        },
-
-       /* TODO: Add L2 Cache information */
+       {
+               /* GL1 Data Cache per SA */
+               .cache_size = 128,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 12,
+       },
+       {
+               /* L2 Data Cache per GPU (Total Tex Cache) */
+               .cache_size = 2048,
+               .cache_level = 2,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 12,
+       },
 };
 
-
-static struct kfd_gpu_cache_info carrizo_cache_info[] = {
+static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
        {
                /* TCP L1 Cache per CU */
                .cache_size = 16,
@@ -105,44 +529,165 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
                .num_cu_shared = 1,
        },
        {
-               /* Scalar L1 Instruction Cache (in SQC module) per bank */
-               .cache_size = 8,
+               /* Scalar L1 Instruction Cache per SQC */
+               .cache_size = 32,
                .cache_level = 1,
                .flags = (CRAT_CACHE_FLAGS_ENABLED |
                                CRAT_CACHE_FLAGS_INST_CACHE |
                                CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 4,
+               .num_cu_shared = 2,
        },
        {
-               /* Scalar L1 Data Cache (in SQC module) per bank. */
-               .cache_size = 4,
+               /* Scalar L1 Data Cache per SQC */
+               .cache_size = 16,
                .cache_level = 1,
                .flags = (CRAT_CACHE_FLAGS_ENABLED |
                                CRAT_CACHE_FLAGS_DATA_CACHE |
                                CRAT_CACHE_FLAGS_SIMD_CACHE),
-               .num_cu_shared = 4,
+               .num_cu_shared = 2,
+       },
+       {
+               /* GL1 Data Cache per SA */
+               .cache_size = 128,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 10,
+       },
+       {
+               /* L2 Data Cache per GPU (Total Tex Cache) */
+               .cache_size = 4096,
+               .cache_level = 2,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 10,
        },
+       {
+               /* L3 Data Cache per GPU */
+               .cache_size = 128*1024,
+               .cache_level = 3,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 10,
+       },
+};
 
-       /* TODO: Add L2 Cache information */
+static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache per SQC */
+               .cache_size = 32,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* Scalar L1 Data Cache per SQC */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* GL1 Data Cache per SA */
+               .cache_size = 128,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 10,
+       },
+       {
+               /* L2 Data Cache per GPU (Total Tex Cache) */
+               .cache_size = 3072,
+               .cache_level = 2,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 10,
+       },
+       {
+               /* L3 Data Cache per GPU */
+               .cache_size = 96*1024,
+               .cache_level = 3,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 10,
+       },
 };
 
-/* NOTE: In future if more information is added to struct kfd_gpu_cache_info
- * the following ASICs may need a separate table.
- */
-#define hawaii_cache_info kaveri_cache_info
-#define tonga_cache_info carrizo_cache_info
-#define fiji_cache_info  carrizo_cache_info
-#define polaris10_cache_info carrizo_cache_info
-#define polaris11_cache_info carrizo_cache_info
-#define polaris12_cache_info carrizo_cache_info
-#define vegam_cache_info carrizo_cache_info
-/* TODO - check & update Vega10 cache details */
-#define vega10_cache_info carrizo_cache_info
-#define raven_cache_info carrizo_cache_info
-#define renoir_cache_info carrizo_cache_info
-/* TODO - check & update Navi10 cache details */
-#define navi10_cache_info carrizo_cache_info
-#define vangogh_cache_info carrizo_cache_info
+static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
+       {
+               /* TCP L1 Cache per CU */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 1,
+       },
+       {
+               /* Scalar L1 Instruction Cache per SQC */
+               .cache_size = 32,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_INST_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* Scalar L1 Data Cache per SQC */
+               .cache_size = 16,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 2,
+       },
+       {
+               /* GL1 Data Cache per SA */
+               .cache_size = 128,
+               .cache_level = 1,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 8,
+       },
+       {
+               /* L2 Data Cache per GPU (Total Tex Cache) */
+               .cache_size = 2048,
+               .cache_level = 2,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 8,
+       },
+       {
+               /* L3 Data Cache per GPU */
+               .cache_size = 32*1024,
+               .cache_level = 3,
+               .flags = (CRAT_CACHE_FLAGS_ENABLED |
+                               CRAT_CACHE_FLAGS_DATA_CACHE |
+                               CRAT_CACHE_FLAGS_SIMD_CACHE),
+               .num_cu_shared = 8,
+       },
+};
 
 static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
                struct crat_subtype_computeunit *cu)
@@ -544,7 +1089,7 @@ err:
 }
 
 /* Helper function. See kfd_fill_gpu_cache_info for parameter description */
-static int fill_in_pcache(struct crat_subtype_cache *pcache,
+static int fill_in_l1_pcache(struct crat_subtype_cache *pcache,
                                struct kfd_gpu_cache_info *pcache_info,
                                struct kfd_cu_info *cu_info,
                                int mem_available,
@@ -597,6 +1142,70 @@ static int fill_in_pcache(struct crat_subtype_cache *pcache,
        return 1;
 }
 
+/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
+static int fill_in_l2_l3_pcache(struct crat_subtype_cache *pcache,
+                               struct kfd_gpu_cache_info *pcache_info,
+                               struct kfd_cu_info *cu_info,
+                               int mem_available,
+                               int cache_type, unsigned int cu_processor_id)
+{
+       unsigned int cu_sibling_map_mask;
+       int first_active_cu;
+       int i, j, k;
+
+       /* First check if enough memory is available */
+       if (sizeof(struct crat_subtype_cache) > mem_available)
+               return -ENOMEM;
+
+       cu_sibling_map_mask = cu_info->cu_bitmap[0][0];
+       cu_sibling_map_mask &=
+               ((1 << pcache_info[cache_type].num_cu_shared) - 1);
+       first_active_cu = ffs(cu_sibling_map_mask);
+
+       /* CU could be inactive. In case of shared cache find the first active
+        * CU. and incase of non-shared cache check if the CU is inactive. If
+        * inactive active skip it
+        */
+       if (first_active_cu) {
+               memset(pcache, 0, sizeof(struct crat_subtype_cache));
+               pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;
+               pcache->length = sizeof(struct crat_subtype_cache);
+               pcache->flags = pcache_info[cache_type].flags;
+               pcache->processor_id_low = cu_processor_id
+                                        + (first_active_cu - 1);
+               pcache->cache_level = pcache_info[cache_type].cache_level;
+               pcache->cache_size = pcache_info[cache_type].cache_size;
+
+               /* Sibling map is w.r.t processor_id_low, so shift out
+                * inactive CU
+                */
+               cu_sibling_map_mask =
+                       cu_sibling_map_mask >> (first_active_cu - 1);
+               k = 0;
+               for (i = 0; i < cu_info->num_shader_engines; i++) {
+                       for (j = 0; j < cu_info->num_shader_arrays_per_engine;
+                               j++) {
+                               pcache->sibling_map[k] =
+                                (uint8_t)(cu_sibling_map_mask & 0xFF);
+                               pcache->sibling_map[k+1] =
+                                (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
+                               pcache->sibling_map[k+2] =
+                                (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
+                               pcache->sibling_map[k+3] =
+                                (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
+                               k += 4;
+                               cu_sibling_map_mask =
+                                       cu_info->cu_bitmap[i % 4][j + i / 4];
+                               cu_sibling_map_mask &= (
+                                (1 << pcache_info[cache_type].num_cu_shared)
+                                - 1);
+                       }
+               }
+               return 0;
+       }
+       return 1;
+}
+
 /* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info
  * tables
  *
@@ -624,6 +1233,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
        int mem_available = available_size;
        unsigned int cu_processor_id;
        int ret;
+       unsigned int num_cu_shared;
 
        switch (kdev->device_info->asic_family) {
        case CHIP_KAVERI:
@@ -663,12 +1273,21 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
                num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
                break;
        case CHIP_VEGA10:
+               pcache_info = vega10_cache_info;
+               num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
+               break;
        case CHIP_VEGA12:
+               pcache_info = vega12_cache_info;
+               num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
+               break;
        case CHIP_VEGA20:
        case CHIP_ARCTURUS:
+               pcache_info = vega20_cache_info;
+               num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
+               break;
        case CHIP_ALDEBARAN:
-               pcache_info = vega10_cache_info;
-               num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
+               pcache_info = aldebaran_cache_info;
+               num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
                break;
        case CHIP_RAVEN:
                pcache_info = raven_cache_info;
@@ -680,12 +1299,24 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
                break;
        case CHIP_NAVI10:
        case CHIP_NAVI12:
+               pcache_info = navi10_cache_info;
+               num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
+               break;
        case CHIP_NAVI14:
+               pcache_info = navi14_cache_info;
+               num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
+               break;
        case CHIP_SIENNA_CICHLID:
+               pcache_info = sienna_cichlid_cache_info;
+               num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
+               break;
        case CHIP_NAVY_FLOUNDER:
+               pcache_info = navy_flounder_cache_info;
+               num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
+               break;
        case CHIP_DIMGREY_CAVEFISH:
-               pcache_info = navi10_cache_info;
-               num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
+               pcache_info = dimgrey_cavefish_cache_info;
+               num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
                break;
        case CHIP_VANGOGH:
                pcache_info = vangogh_cache_info;
@@ -709,40 +1340,58 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
         */
 
        for (ct = 0; ct < num_of_cache_types; ct++) {
-               cu_processor_id = gpu_processor_id;
-               for (i = 0; i < cu_info->num_shader_engines; i++) {
-                       for (j = 0; j < cu_info->num_shader_arrays_per_engine;
-                               j++) {
-                               for (k = 0; k < cu_info->num_cu_per_sh;
-                                       k += pcache_info[ct].num_cu_shared) {
-
-                                       ret = fill_in_pcache(pcache,
-                                               pcache_info,
-                                               cu_info,
-                                               mem_available,
-                                               cu_info->cu_bitmap[i % 4][j + i / 4],
-                                               ct,
-                                               cu_processor_id,
-                                               k);
-
-                                       if (ret < 0)
-                                               break;
-
-                                       if (!ret) {
-                                               pcache++;
-                                               (*num_of_entries)++;
-                                               mem_available -=
-                                                       sizeof(*pcache);
-                                               (*size_filled) +=
-                                                       sizeof(*pcache);
-                                       }
-
-                                       /* Move to next CU block */
-                                       cu_processor_id +=
-                                               pcache_info[ct].num_cu_shared;
-                               }
-                       }
+         cu_processor_id = gpu_processor_id;
+         if (pcache_info[ct].cache_level == 1) {
+           for (i = 0; i < cu_info->num_shader_engines; i++) {
+             for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
+               for (k = 0; k < cu_info->num_cu_per_sh;
+                 k += pcache_info[ct].num_cu_shared) {
+                 ret = fill_in_l1_pcache(pcache,
+                                       pcache_info,
+                                       cu_info,
+                                       mem_available,
+                                       cu_info->cu_bitmap[i % 4][j + i / 4],
+                                       ct,
+                                       cu_processor_id,
+                                       k);
+
+                 if (ret < 0)
+                       break;
+
+                 if (!ret) {
+                               pcache++;
+                               (*num_of_entries)++;
+                               mem_available -= sizeof(*pcache);
+                               (*size_filled) += sizeof(*pcache);
+                 }
+
+                 /* Move to next CU block */
+                 num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
+                                       cu_info->num_cu_per_sh) ?
+                                       pcache_info[ct].num_cu_shared :
+                                       (cu_info->num_cu_per_sh - k);
+                 cu_processor_id += num_cu_shared;
                }
+             }
+           }
+         } else {
+                       ret = fill_in_l2_l3_pcache(pcache,
+                               pcache_info,
+                               cu_info,
+                               mem_available,
+                               ct,
+                               cu_processor_id);
+
+                       if (ret < 0)
+                               break;
+
+                       if (!ret) {
+                               pcache++;
+                               (*num_of_entries)++;
+                               mem_available -= sizeof(*pcache);
+                               (*size_filled) += sizeof(*pcache);
+                       }
+         }
        }
 
        pr_debug("Added [%d] GPU cache entries\n", *num_of_entries);
@@ -1100,6 +1749,92 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
        return 0;
 }
 
+#ifdef CONFIG_ACPI_NUMA
+static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
+{
+       struct acpi_table_header *table_header = NULL;
+       struct acpi_subtable_header *sub_header = NULL;
+       unsigned long table_end, subtable_len;
+       u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
+                       pci_dev_id(kdev->pdev);
+       u32 bdf;
+       acpi_status status;
+       struct acpi_srat_cpu_affinity *cpu;
+       struct acpi_srat_generic_affinity *gpu;
+       int pxm = 0, max_pxm = 0;
+       int numa_node = NUMA_NO_NODE;
+       bool found = false;
+
+       /* Fetch the SRAT table from ACPI */
+       status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
+       if (status == AE_NOT_FOUND) {
+               pr_warn("SRAT table not found\n");
+               return;
+       } else if (ACPI_FAILURE(status)) {
+               const char *err = acpi_format_exception(status);
+               pr_err("SRAT table error: %s\n", err);
+               return;
+       }
+
+       table_end = (unsigned long)table_header + table_header->length;
+
+       /* Parse all entries looking for a match. */
+       sub_header = (struct acpi_subtable_header *)
+                       ((unsigned long)table_header +
+                       sizeof(struct acpi_table_srat));
+       subtable_len = sub_header->length;
+
+       while (((unsigned long)sub_header) + subtable_len  < table_end) {
+               /*
+                * If length is 0, break from this loop to avoid
+                * infinite loop.
+                */
+               if (subtable_len == 0) {
+                       pr_err("SRAT invalid zero length\n");
+                       break;
+               }
+
+               switch (sub_header->type) {
+               case ACPI_SRAT_TYPE_CPU_AFFINITY:
+                       cpu = (struct acpi_srat_cpu_affinity *)sub_header;
+                       pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
+                                       cpu->proximity_domain_lo;
+                       if (pxm > max_pxm)
+                               max_pxm = pxm;
+                       break;
+               case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
+                       gpu = (struct acpi_srat_generic_affinity *)sub_header;
+                       bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
+                                       *((u16 *)(&gpu->device_handle[2]));
+                       if (bdf == pci_id) {
+                               found = true;
+                               numa_node = pxm_to_node(gpu->proximity_domain);
+                       }
+                       break;
+               default:
+                       break;
+               }
+
+               if (found)
+                       break;
+
+               sub_header = (struct acpi_subtable_header *)
+                               ((unsigned long)sub_header + subtable_len);
+               subtable_len = sub_header->length;
+       }
+
+       acpi_put_table(table_header);
+
+       /* Workaround bad cpu-gpu binding case */
+       if (found && (numa_node < 0 ||
+                       numa_node > pxm_to_node(max_pxm)))
+               numa_node = 0;
+
+       if (numa_node != NUMA_NO_NODE)
+               set_dev_node(&kdev->pdev->dev, numa_node);
+}
+#endif
+
 /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
  * to its NUMA node
  *     @avail_size: Available size in the memory
@@ -1140,11 +1875,17 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
                 */
                sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
                sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
+               sub_type_hdr->num_hops_xgmi = 1;
        } else {
                sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;
        }
 
        sub_type_hdr->proximity_domain_from = proximity_domain;
+
+#ifdef CONFIG_ACPI_NUMA
+       if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
+               kfd_find_numa_node_in_srat(kdev);
+#endif
 #ifdef CONFIG_NUMA
        if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
                sub_type_hdr->proximity_domain_to = 0;
index 357b9bf62a1cfc272dcc963bf9f3cc4211c0155c..dedb8e33b953321bb81c8eb474550954884efb61 100644 (file)
 #include "kfd_priv.h"
 #include "kfd_device_queue_manager.h"
 #include "kfd_pm4_headers_vi.h"
+#include "kfd_pm4_headers_aldebaran.h"
 #include "cwsr_trap_handler.h"
 #include "kfd_iommu.h"
 #include "amdgpu_amdkfd.h"
 #include "kfd_smi_events.h"
+#include "kfd_migrate.h"
 
 #define MQD_SIZE_ALIGNED 768
 
@@ -576,7 +578,7 @@ static const struct kfd_device_info *kfd_supported_devices[][2] = {
        [CHIP_VEGA20] = {&vega20_device_info, NULL},
        [CHIP_RENOIR] = {&renoir_device_info, NULL},
        [CHIP_ARCTURUS] = {&arcturus_device_info, &arcturus_device_info},
-       [CHIP_ALDEBARAN] = {&aldebaran_device_info, NULL},
+       [CHIP_ALDEBARAN] = {&aldebaran_device_info, &aldebaran_device_info},
        [CHIP_NAVI10] = {&navi10_device_info, NULL},
        [CHIP_NAVI12] = {&navi12_device_info, &navi12_device_info},
        [CHIP_NAVI14] = {&navi14_device_info, NULL},
@@ -697,7 +699,9 @@ static int kfd_gws_init(struct kfd_dev *kfd)
                        && kfd->device_info->asic_family <= CHIP_RAVEN
                        && kfd->mec2_fw_version >= 0x1b3)
                || (kfd->device_info->asic_family == CHIP_ARCTURUS
-                       && kfd->mec2_fw_version >= 0x30))
+                       && kfd->mec2_fw_version >= 0x30)
+               || (kfd->device_info->asic_family == CHIP_ALDEBARAN
+                       && kfd->mec2_fw_version >= 0x28))
                ret = amdgpu_amdkfd_alloc_gws(kfd->kgd,
                                amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws);
 
@@ -713,7 +717,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
                         struct drm_device *ddev,
                         const struct kgd2kfd_shared_resources *gpu_resources)
 {
-       unsigned int size;
+       unsigned int size, map_process_packet_size;
 
        kfd->ddev = ddev;
        kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
@@ -748,7 +752,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
         * calculate max size of runlist packet.
         * There can be only 2 packets at once
         */
-       size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_mes_map_process) +
+       map_process_packet_size =
+                       kfd->device_info->asic_family == CHIP_ALDEBARAN ?
+                               sizeof(struct pm4_mes_map_process_aldebaran) :
+                                       sizeof(struct pm4_mes_map_process);
+       size += (KFD_MAX_NUM_OF_PROCESSES * map_process_packet_size +
                max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues)
                + sizeof(struct pm4_mes_runlist)) * 2;
 
@@ -814,6 +822,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 
        kfd_cwsr_init(kfd);
 
+       svm_migrate_init((struct amdgpu_device *)kfd->kgd);
+
        if (kfd_resume(kfd))
                goto kfd_resume_error;
 
@@ -862,6 +872,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
 {
        if (kfd->init_complete) {
                kgd2kfd_suspend(kfd, false);
+               svm_migrate_fini((struct amdgpu_device *)kfd->kgd);
                device_queue_manager_uninit(kfd->dqm);
                kfd_interrupt_exit(kfd);
                kfd_topology_remove_device(kfd);
index d3eaa1549bd784f0f9486e2826c502d6906e49d5..98c2046c733127fd7ababd122d8d52c8f946fb78 100644 (file)
@@ -738,7 +738,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
 
        pdd = qpd_to_pdd(qpd);
        /* Retrieve PD base */
-       pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
+       pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
 
        dqm_lock(dqm);
        if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
@@ -821,7 +821,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
 
        pdd = qpd_to_pdd(qpd);
        /* Retrieve PD base */
-       pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
+       pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
 
        dqm_lock(dqm);
        if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
@@ -873,7 +873,7 @@ static int register_process(struct device_queue_manager *dqm,
 
        pdd = qpd_to_pdd(qpd);
        /* Retrieve PD base */
-       pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
+       pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
 
        dqm_lock(dqm);
        list_add(&n->list, &dqm->queues);
index eca6331efa9495df52cef4f919745825adf415ab..b5c3d13643f135bef97cb3a490f9f2c74db406d4 100644 (file)
@@ -61,10 +61,19 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
                qpd->sh_mem_config =
                                SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
                                        SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
-               if (dqm->dev->noretry &&
-                   !dqm->dev->use_iommu_v2)
+
+               if (dqm->dev->device_info->asic_family == CHIP_ALDEBARAN) {
+                       /* Aldebaran can safely support different XNACK modes
+                        * per process
+                        */
+                       if (!pdd->process->xnack_enabled)
+                               qpd->sh_mem_config |=
+                                       1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
+               } else if (dqm->dev->noretry &&
+                          !dqm->dev->use_iommu_v2) {
                        qpd->sh_mem_config |=
                                1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
+               }
 
                qpd->sh_mem_ape1_limit = 0;
                qpd->sh_mem_ape1_base = 0;
index a2c9063076cc2e91759dd159f5917b14575a252c..72815e86a3b8e20706ecb165e7eb1825e2486e1b 100644 (file)
@@ -405,6 +405,10 @@ int kfd_init_apertures(struct kfd_process *process)
                        case CHIP_POLARIS12:
                        case CHIP_VEGAM:
                                kfd_init_apertures_vi(pdd, id);
+                               /* VI GPUs cannot support SVM with only
+                                * 40 bits of virtual address space.
+                                */
+                               process->svm_disabled = true;
                                break;
                        case CHIP_VEGA10:
                        case CHIP_VEGA12:
index 696944fa0177edb9d199f8561025af16b80b5511..97c36e3c8c80e5ef5d74a9050435015ae690aa6a 100644 (file)
@@ -25,7 +25,6 @@
 #include "soc15_int.h"
 #include "kfd_device_queue_manager.h"
 #include "kfd_smi_events.h"
-#include "amdgpu.h"
 
 enum SQ_INTERRUPT_WORD_ENCODING {
        SQ_INTERRUPT_WORD_ENCODING_AUTO = 0x0,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
new file mode 100644 (file)
index 0000000..fd8f544
--- /dev/null
@@ -0,0 +1,937 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2020-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/types.h>
+#include <linux/hmm.h>
+#include <linux/dma-direction.h>
+#include <linux/dma-mapping.h>
+#include "amdgpu_sync.h"
+#include "amdgpu_object.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_mn.h"
+#include "kfd_priv.h"
+#include "kfd_svm.h"
+#include "kfd_migrate.h"
+
+static uint64_t
+svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr)
+{
+       return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM);
+}
+
+static int
+svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
+                    dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags)
+{
+       struct amdgpu_device *adev = ring->adev;
+       struct amdgpu_job *job;
+       unsigned int num_dw, num_bytes;
+       struct dma_fence *fence;
+       uint64_t src_addr, dst_addr;
+       uint64_t pte_flags;
+       void *cpu_addr;
+       int r;
+
+       /* use gart window 0 */
+       *gart_addr = adev->gmc.gart_start;
+
+       num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
+       num_bytes = npages * 8;
+
+       r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
+                                    AMDGPU_IB_POOL_DELAYED, &job);
+       if (r)
+               return r;
+
+       src_addr = num_dw * 4;
+       src_addr += job->ibs[0].gpu_addr;
+
+       dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
+       amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
+                               dst_addr, num_bytes, false);
+
+       amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+       WARN_ON(job->ibs[0].length_dw > num_dw);
+
+       pte_flags = AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
+       pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED;
+       if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO))
+               pte_flags |= AMDGPU_PTE_WRITEABLE;
+       pte_flags |= adev->gart.gart_pte_flags;
+
+       cpu_addr = &job->ibs[0].ptr[num_dw];
+
+       r = amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr);
+       if (r)
+               goto error_free;
+
+       r = amdgpu_job_submit(job, &adev->mman.entity,
+                             AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
+       if (r)
+               goto error_free;
+
+       dma_fence_put(fence);
+
+       return r;
+
+error_free:
+       amdgpu_job_free(job);
+       return r;
+}
+
+/**
+ * svm_migrate_copy_memory_gart - sdma copy data between ram and vram
+ *
+ * @adev: amdgpu device the sdma ring running
+ * @src: source page address array
+ * @dst: destination page address array
+ * @npages: number of pages to copy
+ * @direction: enum MIGRATION_COPY_DIR
+ * @mfence: output, sdma fence to signal after sdma is done
+ *
+ * ram address uses GART table continuous entries mapping to ram pages,
+ * vram address uses direct mapping of vram pages, which must have npages
+ * number of continuous pages.
+ * GART update and sdma uses same buf copy function ring, sdma is splited to
+ * multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait for
+ * the last sdma finish fence which is returned to check copy memory is done.
+ *
+ * Context: Process context, takes and releases gtt_window_lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+
+static int
+svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
+                            uint64_t *vram, uint64_t npages,
+                            enum MIGRATION_COPY_DIR direction,
+                            struct dma_fence **mfence)
+{
+       const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE;
+       struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+       uint64_t gart_s, gart_d;
+       struct dma_fence *next;
+       uint64_t size;
+       int r;
+
+       mutex_lock(&adev->mman.gtt_window_lock);
+
+       while (npages) {
+               size = min(GTT_MAX_PAGES, npages);
+
+               if (direction == FROM_VRAM_TO_RAM) {
+                       gart_s = svm_migrate_direct_mapping_addr(adev, *vram);
+                       r = svm_migrate_gart_map(ring, size, sys, &gart_d, 0);
+
+               } else if (direction == FROM_RAM_TO_VRAM) {
+                       r = svm_migrate_gart_map(ring, size, sys, &gart_s,
+                                                KFD_IOCTL_SVM_FLAG_GPU_RO);
+                       gart_d = svm_migrate_direct_mapping_addr(adev, *vram);
+               }
+               if (r) {
+                       pr_debug("failed %d to create gart mapping\n", r);
+                       goto out_unlock;
+               }
+
+               r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
+                                      NULL, &next, false, true, false);
+               if (r) {
+                       pr_debug("failed %d to copy memory\n", r);
+                       goto out_unlock;
+               }
+
+               dma_fence_put(*mfence);
+               *mfence = next;
+               npages -= size;
+               if (npages) {
+                       sys += size;
+                       vram += size;
+               }
+       }
+
+out_unlock:
+       mutex_unlock(&adev->mman.gtt_window_lock);
+
+       return r;
+}
+
+/**
+ * svm_migrate_copy_done - wait for memory copy sdma is done
+ *
+ * @adev: amdgpu device the sdma memory copy is executing on
+ * @mfence: migrate fence
+ *
+ * Wait for dma fence is signaled, if the copy ssplit into multiple sdma
+ * operations, this is the last sdma operation fence.
+ *
+ * Context: called after svm_migrate_copy_memory
+ *
+ * Return:
+ * 0           - success
+ * otherwise   - error code from dma fence signal
+ */
+static int
+svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence)
+{
+       int r = 0;
+
+       if (mfence) {
+               r = dma_fence_wait(mfence, false);
+               dma_fence_put(mfence);
+               pr_debug("sdma copy memory fence done\n");
+       }
+
+       return r;
+}
+
+static uint64_t
+svm_migrate_node_physical_addr(struct amdgpu_device *adev,
+                              struct drm_mm_node **mm_node, uint64_t *offset)
+{
+       struct drm_mm_node *node = *mm_node;
+       uint64_t pos = *offset;
+
+       if (node->start == AMDGPU_BO_INVALID_OFFSET) {
+               pr_debug("drm node is not validated\n");
+               return 0;
+       }
+
+       pr_debug("vram node start 0x%llx npages 0x%llx\n", node->start,
+                node->size);
+
+       if (pos >= node->size) {
+               do  {
+                       pos -= node->size;
+                       node++;
+               } while (pos >= node->size);
+
+               *mm_node = node;
+               *offset = pos;
+       }
+
+       return (node->start + pos) << PAGE_SHIFT;
+}
+
+unsigned long
+svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr)
+{
+       return (addr + adev->kfd.dev->pgmap.range.start) >> PAGE_SHIFT;
+}
+
+static void
+svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn)
+{
+       struct page *page;
+
+       page = pfn_to_page(pfn);
+       page->zone_device_data = prange;
+       get_page(page);
+       lock_page(page);
+}
+
+static void
+svm_migrate_put_vram_page(struct amdgpu_device *adev, unsigned long addr)
+{
+       struct page *page;
+
+       page = pfn_to_page(svm_migrate_addr_to_pfn(adev, addr));
+       unlock_page(page);
+       put_page(page);
+}
+
+static unsigned long
+svm_migrate_addr(struct amdgpu_device *adev, struct page *page)
+{
+       unsigned long addr;
+
+       addr = page_to_pfn(page) << PAGE_SHIFT;
+       return (addr - adev->kfd.dev->pgmap.range.start);
+}
+
+static struct page *
+svm_migrate_get_sys_page(struct vm_area_struct *vma, unsigned long addr)
+{
+       struct page *page;
+
+       page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
+       if (page)
+               lock_page(page);
+
+       return page;
+}
+
+static void svm_migrate_put_sys_page(unsigned long addr)
+{
+       struct page *page;
+
+       page = pfn_to_page(addr >> PAGE_SHIFT);
+       unlock_page(page);
+       put_page(page);
+}
+
+static int
+svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
+                        struct migrate_vma *migrate, struct dma_fence **mfence,
+                        dma_addr_t *scratch)
+{
+       uint64_t npages = migrate->cpages;
+       struct device *dev = adev->dev;
+       struct drm_mm_node *node;
+       dma_addr_t *src;
+       uint64_t *dst;
+       uint64_t vram_addr;
+       uint64_t offset;
+       uint64_t i, j;
+       int r;
+
+       pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
+                prange->last);
+
+       src = scratch;
+       dst = (uint64_t *)(scratch + npages);
+
+       r = svm_range_vram_node_new(adev, prange, true);
+       if (r) {
+               pr_debug("failed %d get 0x%llx pages from vram\n", r, npages);
+               goto out;
+       }
+
+       node = prange->ttm_res->mm_node;
+       offset = prange->offset;
+       vram_addr = svm_migrate_node_physical_addr(adev, &node, &offset);
+       if (!vram_addr) {
+               WARN_ONCE(1, "vram node address is 0\n");
+               r = -ENOMEM;
+               goto out;
+       }
+
+       for (i = j = 0; i < npages; i++) {
+               struct page *spage;
+
+               dst[i] = vram_addr + (j << PAGE_SHIFT);
+               migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
+               svm_migrate_get_vram_page(prange, migrate->dst[i]);
+
+               migrate->dst[i] = migrate_pfn(migrate->dst[i]);
+               migrate->dst[i] |= MIGRATE_PFN_LOCKED;
+
+               if (migrate->src[i] & MIGRATE_PFN_VALID) {
+                       spage = migrate_pfn_to_page(migrate->src[i]);
+                       src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
+                                             DMA_TO_DEVICE);
+                       r = dma_mapping_error(dev, src[i]);
+                       if (r) {
+                               pr_debug("failed %d dma_map_page\n", r);
+                               goto out_free_vram_pages;
+                       }
+               } else {
+                       if (j) {
+                               r = svm_migrate_copy_memory_gart(
+                                               adev, src + i - j,
+                                               dst + i - j, j,
+                                               FROM_RAM_TO_VRAM,
+                                               mfence);
+                               if (r)
+                                       goto out_free_vram_pages;
+                               offset += j;
+                               vram_addr = (node->start + offset) << PAGE_SHIFT;
+                               j = 0;
+                       } else {
+                               offset++;
+                               vram_addr += PAGE_SIZE;
+                       }
+                       if (offset >= node->size) {
+                               node++;
+                               pr_debug("next node size 0x%llx\n", node->size);
+                               vram_addr = node->start << PAGE_SHIFT;
+                               offset = 0;
+                       }
+                       continue;
+               }
+
+               pr_debug("dma mapping src to 0x%llx, page_to_pfn 0x%lx\n",
+                        src[i] >> PAGE_SHIFT, page_to_pfn(spage));
+
+               if (j + offset >= node->size - 1 && i < npages - 1) {
+                       r = svm_migrate_copy_memory_gart(adev, src + i - j,
+                                                        dst + i - j, j + 1,
+                                                        FROM_RAM_TO_VRAM,
+                                                        mfence);
+                       if (r)
+                               goto out_free_vram_pages;
+
+                       node++;
+                       pr_debug("next node size 0x%llx\n", node->size);
+                       vram_addr = node->start << PAGE_SHIFT;
+                       offset = 0;
+                       j = 0;
+               } else {
+                       j++;
+               }
+       }
+
+       r = svm_migrate_copy_memory_gart(adev, src + i - j, dst + i - j, j,
+                                        FROM_RAM_TO_VRAM, mfence);
+
+out_free_vram_pages:
+       if (r) {
+               pr_debug("failed %d to copy memory to vram\n", r);
+               while (i--) {
+                       svm_migrate_put_vram_page(adev, dst[i]);
+                       migrate->dst[i] = 0;
+               }
+       }
+
+out:
+       return r;
+}
+
+static int
+svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
+                       struct vm_area_struct *vma, uint64_t start,
+                       uint64_t end)
+{
+       uint64_t npages = (end - start) >> PAGE_SHIFT;
+       struct dma_fence *mfence = NULL;
+       struct migrate_vma migrate;
+       dma_addr_t *scratch;
+       size_t size;
+       void *buf;
+       int r = -ENOMEM;
+       int retry = 0;
+
+       memset(&migrate, 0, sizeof(migrate));
+       migrate.vma = vma;
+       migrate.start = start;
+       migrate.end = end;
+       migrate.flags = MIGRATE_VMA_SELECT_SYSTEM;
+       migrate.pgmap_owner = adev;
+
+       size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
+       size *= npages;
+       buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
+       if (!buf)
+               goto out;
+
+       migrate.src = buf;
+       migrate.dst = migrate.src + npages;
+       scratch = (dma_addr_t *)(migrate.dst + npages);
+
+retry:
+       r = migrate_vma_setup(&migrate);
+       if (r) {
+               pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n",
+                        r, prange->svms, prange->start, prange->last);
+               goto out_free;
+       }
+       if (migrate.cpages != npages) {
+               pr_debug("collect 0x%lx/0x%llx pages, retry\n", migrate.cpages,
+                        npages);
+               migrate_vma_finalize(&migrate);
+               if (retry++ >= 3) {
+                       r = -ENOMEM;
+                       pr_debug("failed %d migrate svms 0x%p [0x%lx 0x%lx]\n",
+                                r, prange->svms, prange->start, prange->last);
+                       goto out_free;
+               }
+
+               goto retry;
+       }
+
+       if (migrate.cpages) {
+               r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence,
+                                            scratch);
+               migrate_vma_pages(&migrate);
+               svm_migrate_copy_done(adev, mfence);
+               migrate_vma_finalize(&migrate);
+       }
+
+       svm_range_dma_unmap(adev->dev, scratch, 0, npages);
+       svm_range_free_dma_mappings(prange);
+
+out_free:
+       kvfree(buf);
+out:
+       return r;
+}
+
+/**
+ * svm_migrate_ram_to_vram - migrate svm range from system to device
+ * @prange: range structure
+ * @best_loc: the device to migrate to
+ * @mm: the process mm structure
+ *
+ * Context: Process context, caller hold mmap read lock, svms lock, prange lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+static int
+svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+                       struct mm_struct *mm)
+{
+       unsigned long addr, start, end;
+       struct vm_area_struct *vma;
+       struct amdgpu_device *adev;
+       int r = 0;
+
+       if (prange->actual_loc == best_loc) {
+               pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
+                        prange->svms, prange->start, prange->last, best_loc);
+               return 0;
+       }
+
+       adev = svm_range_get_adev_by_id(prange, best_loc);
+       if (!adev) {
+               pr_debug("failed to get device by id 0x%x\n", best_loc);
+               return -ENODEV;
+       }
+
+       pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
+                prange->start, prange->last, best_loc);
+
+       /* FIXME: workaround for page locking bug with invalid pages */
+       svm_range_prefault(prange, mm);
+
+       start = prange->start << PAGE_SHIFT;
+       end = (prange->last + 1) << PAGE_SHIFT;
+
+       for (addr = start; addr < end;) {
+               unsigned long next;
+
+               vma = find_vma(mm, addr);
+               if (!vma || addr < vma->vm_start)
+                       break;
+
+               next = min(vma->vm_end, end);
+               r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next);
+               if (r) {
+                       pr_debug("failed to migrate\n");
+                       break;
+               }
+               addr = next;
+       }
+
+       if (!r)
+               prange->actual_loc = best_loc;
+
+       return r;
+}
+
+static void svm_migrate_page_free(struct page *page)
+{
+       /* Keep this function to avoid warning */
+}
+
+static int
+svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
+                       struct migrate_vma *migrate, struct dma_fence **mfence,
+                       dma_addr_t *scratch)
+{
+       uint64_t npages = migrate->cpages;
+       struct device *dev = adev->dev;
+       uint64_t *src;
+       dma_addr_t *dst;
+       struct page *dpage;
+       uint64_t i = 0, j;
+       uint64_t addr;
+       int r = 0;
+
+       pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
+                prange->last);
+
+       addr = prange->start << PAGE_SHIFT;
+
+       src = (uint64_t *)(scratch + npages);
+       dst = scratch;
+
+       for (i = 0, j = 0; i < npages; i++, j++, addr += PAGE_SIZE) {
+               struct page *spage;
+
+               spage = migrate_pfn_to_page(migrate->src[i]);
+               if (!spage) {
+                       pr_debug("failed get spage svms 0x%p [0x%lx 0x%lx]\n",
+                                prange->svms, prange->start, prange->last);
+                       r = -ENOMEM;
+                       goto out_oom;
+               }
+               src[i] = svm_migrate_addr(adev, spage);
+               if (i > 0 && src[i] != src[i - 1] + PAGE_SIZE) {
+                       r = svm_migrate_copy_memory_gart(adev, dst + i - j,
+                                                        src + i - j, j,
+                                                        FROM_VRAM_TO_RAM,
+                                                        mfence);
+                       if (r)
+                               goto out_oom;
+                       j = 0;
+               }
+
+               dpage = svm_migrate_get_sys_page(migrate->vma, addr);
+               if (!dpage) {
+                       pr_debug("failed get page svms 0x%p [0x%lx 0x%lx]\n",
+                                prange->svms, prange->start, prange->last);
+                       r = -ENOMEM;
+                       goto out_oom;
+               }
+
+               dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+               r = dma_mapping_error(dev, dst[i]);
+               if (r) {
+                       pr_debug("failed %d dma_map_page\n", r);
+                       goto out_oom;
+               }
+
+               pr_debug("dma mapping dst to 0x%llx, page_to_pfn 0x%lx\n",
+                             dst[i] >> PAGE_SHIFT, page_to_pfn(dpage));
+
+               migrate->dst[i] = migrate_pfn(page_to_pfn(dpage));
+               migrate->dst[i] |= MIGRATE_PFN_LOCKED;
+       }
+
+       r = svm_migrate_copy_memory_gart(adev, dst + i - j, src + i - j, j,
+                                        FROM_VRAM_TO_RAM, mfence);
+
+out_oom:
+       if (r) {
+               pr_debug("failed %d copy to ram\n", r);
+               while (i--) {
+                       svm_migrate_put_sys_page(dst[i]);
+                       migrate->dst[i] = 0;
+               }
+       }
+
+       return r;
+}
+
+static int
+svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
+                      struct vm_area_struct *vma, uint64_t start, uint64_t end)
+{
+       uint64_t npages = (end - start) >> PAGE_SHIFT;
+       struct dma_fence *mfence = NULL;
+       struct migrate_vma migrate;
+       dma_addr_t *scratch;
+       size_t size;
+       void *buf;
+       int r = -ENOMEM;
+
+       memset(&migrate, 0, sizeof(migrate));
+       migrate.vma = vma;
+       migrate.start = start;
+       migrate.end = end;
+       migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
+       migrate.pgmap_owner = adev;
+
+       size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
+       size *= npages;
+       buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
+       if (!buf)
+               goto out;
+
+       migrate.src = buf;
+       migrate.dst = migrate.src + npages;
+       scratch = (dma_addr_t *)(migrate.dst + npages);
+
+       r = migrate_vma_setup(&migrate);
+       if (r) {
+               pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n",
+                        r, prange->svms, prange->start, prange->last);
+               goto out_free;
+       }
+
+       pr_debug("cpages %ld\n", migrate.cpages);
+
+       if (migrate.cpages) {
+               r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
+                                           scratch);
+               migrate_vma_pages(&migrate);
+               svm_migrate_copy_done(adev, mfence);
+               migrate_vma_finalize(&migrate);
+       } else {
+               pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n",
+                        prange->start, prange->last);
+       }
+
+       svm_range_dma_unmap(adev->dev, scratch, 0, npages);
+
+out_free:
+       kvfree(buf);
+out:
+       return r;
+}
+
+/**
+ * svm_migrate_vram_to_ram - migrate svm range from device to system
+ * @prange: range structure
+ * @mm: process mm, use current->mm if NULL
+ *
+ * Context: Process context, caller hold mmap read lock, svms lock, prange lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
+{
+       struct amdgpu_device *adev;
+       struct vm_area_struct *vma;
+       unsigned long addr;
+       unsigned long start;
+       unsigned long end;
+       int r = 0;
+
+       if (!prange->actual_loc) {
+               pr_debug("[0x%lx 0x%lx] already migrated to ram\n",
+                        prange->start, prange->last);
+               return 0;
+       }
+
+       adev = svm_range_get_adev_by_id(prange, prange->actual_loc);
+       if (!adev) {
+               pr_debug("failed to get device by id 0x%x\n",
+                        prange->actual_loc);
+               return -ENODEV;
+       }
+
+       pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n",
+                prange->svms, prange, prange->start, prange->last,
+                prange->actual_loc);
+
+       start = prange->start << PAGE_SHIFT;
+       end = (prange->last + 1) << PAGE_SHIFT;
+
+       for (addr = start; addr < end;) {
+               unsigned long next;
+
+               vma = find_vma(mm, addr);
+               if (!vma || addr < vma->vm_start)
+                       break;
+
+               next = min(vma->vm_end, end);
+               r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next);
+               if (r) {
+                       pr_debug("failed %d to migrate\n", r);
+                       break;
+               }
+               addr = next;
+       }
+
+       if (!r) {
+               svm_range_vram_node_free(prange);
+               prange->actual_loc = 0;
+       }
+       return r;
+}
+
+/**
+ * svm_migrate_vram_to_vram - migrate svm range from device to device
+ * @prange: range structure
+ * @best_loc: the device to migrate to
+ * @mm: process mm, use current->mm if NULL
+ *
+ * Context: Process context, caller hold mmap read lock, svms lock, prange lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+static int
+svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
+                        struct mm_struct *mm)
+{
+       int r;
+
+       /*
+        * TODO: for both devices with PCIe large bar or on same xgmi hive, skip
+        * system memory as migration bridge
+        */
+
+       pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
+
+       r = svm_migrate_vram_to_ram(prange, mm);
+       if (r)
+               return r;
+
+       return svm_migrate_ram_to_vram(prange, best_loc, mm);
+}
+
+int
+svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
+                   struct mm_struct *mm)
+{
+       if  (!prange->actual_loc)
+               return svm_migrate_ram_to_vram(prange, best_loc, mm);
+       else
+               return svm_migrate_vram_to_vram(prange, best_loc, mm);
+
+}
+
+/**
+ * svm_migrate_to_ram - CPU page fault handler
+ * @vmf: CPU vm fault vma, address
+ *
+ * Context: vm fault handler, caller holds the mmap read lock
+ *
+ * Return:
+ * 0 - OK
+ * VM_FAULT_SIGBUS - notice application to have SIGBUS page fault
+ */
+static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
+{
+       unsigned long addr = vmf->address;
+       struct vm_area_struct *vma;
+       enum svm_work_list_ops op;
+       struct svm_range *parent;
+       struct svm_range *prange;
+       struct kfd_process *p;
+       struct mm_struct *mm;
+       int r = 0;
+
+       vma = vmf->vma;
+       mm = vma->vm_mm;
+
+       p = kfd_lookup_process_by_mm(vma->vm_mm);
+       if (!p) {
+               pr_debug("failed find process at fault address 0x%lx\n", addr);
+               return VM_FAULT_SIGBUS;
+       }
+       addr >>= PAGE_SHIFT;
+       pr_debug("CPU page fault svms 0x%p address 0x%lx\n", &p->svms, addr);
+
+       mutex_lock(&p->svms.lock);
+
+       prange = svm_range_from_addr(&p->svms, addr, &parent);
+       if (!prange) {
+               pr_debug("cannot find svm range at 0x%lx\n", addr);
+               r = -EFAULT;
+               goto out;
+       }
+
+       mutex_lock(&parent->migrate_mutex);
+       if (prange != parent)
+               mutex_lock_nested(&prange->migrate_mutex, 1);
+
+       if (!prange->actual_loc)
+               goto out_unlock_prange;
+
+       svm_range_lock(parent);
+       if (prange != parent)
+               mutex_lock_nested(&prange->lock, 1);
+       r = svm_range_split_by_granularity(p, mm, addr, parent, prange);
+       if (prange != parent)
+               mutex_unlock(&prange->lock);
+       svm_range_unlock(parent);
+       if (r) {
+               pr_debug("failed %d to split range by granularity\n", r);
+               goto out_unlock_prange;
+       }
+
+       r = svm_migrate_vram_to_ram(prange, mm);
+       if (r)
+               pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r,
+                        prange, prange->start, prange->last);
+
+       /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
+       if (p->xnack_enabled && parent == prange)
+               op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP;
+       else
+               op = SVM_OP_UPDATE_RANGE_NOTIFIER;
+       svm_range_add_list_work(&p->svms, parent, mm, op);
+       schedule_deferred_list_work(&p->svms);
+
+out_unlock_prange:
+       if (prange != parent)
+               mutex_unlock(&prange->migrate_mutex);
+       mutex_unlock(&parent->migrate_mutex);
+out:
+       mutex_unlock(&p->svms.lock);
+       kfd_unref_process(p);
+
+       pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr);
+
+       return r ? VM_FAULT_SIGBUS : 0;
+}
+
+static const struct dev_pagemap_ops svm_migrate_pgmap_ops = {
+       .page_free              = svm_migrate_page_free,
+       .migrate_to_ram         = svm_migrate_to_ram,
+};
+
+/* Each VRAM page uses sizeof(struct page) on system memory */
+#define SVM_HMM_PAGE_STRUCT_SIZE(size) ((size)/PAGE_SIZE * sizeof(struct page))
+
+int svm_migrate_init(struct amdgpu_device *adev)
+{
+       struct kfd_dev *kfddev = adev->kfd.dev;
+       struct dev_pagemap *pgmap;
+       struct resource *res;
+       unsigned long size;
+       void *r;
+
+       /* Page migration works on Vega10 or newer */
+       if (kfddev->device_info->asic_family < CHIP_VEGA10)
+               return -EINVAL;
+
+       pgmap = &kfddev->pgmap;
+       memset(pgmap, 0, sizeof(*pgmap));
+
+       /* TODO: register all vram to HMM for now.
+        * should remove reserved size
+        */
+       size = ALIGN(adev->gmc.real_vram_size, 2ULL << 20);
+       res = devm_request_free_mem_region(adev->dev, &iomem_resource, size);
+       if (IS_ERR(res))
+               return -ENOMEM;
+
+       pgmap->type = MEMORY_DEVICE_PRIVATE;
+       pgmap->nr_range = 1;
+       pgmap->range.start = res->start;
+       pgmap->range.end = res->end;
+       pgmap->ops = &svm_migrate_pgmap_ops;
+       pgmap->owner = adev;
+       pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
+       r = devm_memremap_pages(adev->dev, pgmap);
+       if (IS_ERR(r)) {
+               pr_err("failed to register HMM device memory\n");
+               devm_release_mem_region(adev->dev, res->start,
+                                       res->end - res->start + 1);
+               return PTR_ERR(r);
+       }
+
+       pr_debug("reserve %ldMB system memory for VRAM pages struct\n",
+                SVM_HMM_PAGE_STRUCT_SIZE(size) >> 20);
+
+       amdgpu_amdkfd_reserve_system_mem(SVM_HMM_PAGE_STRUCT_SIZE(size));
+
+       pr_info("HMM registered %ldMB device memory\n", size >> 20);
+
+       return 0;
+}
+
+void svm_migrate_fini(struct amdgpu_device *adev)
+{
+       struct dev_pagemap *pgmap = &adev->kfd.dev->pgmap;
+
+       devm_memunmap_pages(adev->dev, pgmap);
+       devm_release_mem_region(adev->dev, pgmap->range.start,
+                               pgmap->range.end - pgmap->range.start + 1);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
new file mode 100644 (file)
index 0000000..0de76b5
--- /dev/null
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2020-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef KFD_MIGRATE_H_
+#define KFD_MIGRATE_H_
+
+#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
+
+#include <linux/rwsem.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/sched/mm.h>
+#include <linux/hmm.h>
+#include "kfd_priv.h"
+#include "kfd_svm.h"
+
+enum MIGRATION_COPY_DIR {
+       FROM_RAM_TO_VRAM = 0,
+       FROM_VRAM_TO_RAM
+};
+
+int svm_migrate_to_vram(struct svm_range *prange,  uint32_t best_loc,
+                       struct mm_struct *mm);
+int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm);
+unsigned long
+svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
+
+int svm_migrate_init(struct amdgpu_device *adev);
+void svm_migrate_fini(struct amdgpu_device *adev);
+
+#else
+
+static inline int svm_migrate_init(struct amdgpu_device *adev)
+{
+       return 0;
+}
+static inline void svm_migrate_fini(struct amdgpu_device *adev)
+{
+       /* empty */
+}
+
+#endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
+
+#endif /* KFD_MIGRATE_H_ */
index e840dd581719c06967077410cc1746e5aae86149..f688451cb299f2396c1b275186895e81c1caa101 100644 (file)
@@ -124,14 +124,14 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
 {
        unsigned int alloc_size_bytes;
        unsigned int *rl_buffer, rl_wptr, i;
-       int retval, proccesses_mapped;
+       int retval, processes_mapped;
        struct device_process_node *cur;
        struct qcm_process_device *qpd;
        struct queue *q;
        struct kernel_queue *kq;
        bool is_over_subscription;
 
-       rl_wptr = retval = proccesses_mapped = 0;
+       rl_wptr = retval = processes_mapped = 0;
 
        retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr,
                                &alloc_size_bytes, &is_over_subscription);
@@ -148,7 +148,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
        list_for_each_entry(cur, queues, list) {
                qpd = cur->qpd;
                /* build map process packet */
-               if (proccesses_mapped >= pm->dqm->processes_count) {
+               if (processes_mapped >= pm->dqm->processes_count) {
                        pr_debug("Not enough space left in runlist IB\n");
                        pm_release_ib(pm);
                        return -ENOMEM;
@@ -158,7 +158,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
                if (retval)
                        return retval;
 
-               proccesses_mapped++;
+               processes_mapped++;
                inc_wptr(&rl_wptr, pm->pmf->map_process_size,
                                alloc_size_bytes);
 
@@ -242,7 +242,6 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
        case CHIP_RAVEN:
        case CHIP_RENOIR:
        case CHIP_ARCTURUS:
-       case CHIP_ALDEBARAN:
        case CHIP_NAVI10:
        case CHIP_NAVI12:
        case CHIP_NAVI14:
@@ -252,6 +251,9 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
        case CHIP_DIMGREY_CAVEFISH:
                pm->pmf = &kfd_v9_pm_funcs;
                break;
+       case CHIP_ALDEBARAN:
+               pm->pmf = &kfd_aldebaran_pm_funcs;
+               break;
        default:
                WARN(1, "Unexpected ASIC family %u",
                     dqm->dev->device_info->asic_family);
index e3ba0cd3b6fa717966e179acf6b2e36e621ddc21..7ea3f671b3253877ca2e4d1554e31c2d6564fff9 100644 (file)
@@ -24,6 +24,7 @@
 #include "kfd_kernel_queue.h"
 #include "kfd_device_queue_manager.h"
 #include "kfd_pm4_headers_ai.h"
+#include "kfd_pm4_headers_aldebaran.h"
 #include "kfd_pm4_opcodes.h"
 #include "gc/gc_10_1_0_sh_mask.h"
 
@@ -35,7 +36,6 @@ static int pm_map_process_v9(struct packet_manager *pm,
 
        packet = (struct pm4_mes_map_process *)buffer;
        memset(buffer, 0, sizeof(struct pm4_mes_map_process));
-
        packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
                                        sizeof(struct pm4_mes_map_process));
        packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
@@ -73,6 +73,45 @@ static int pm_map_process_v9(struct packet_manager *pm,
        return 0;
 }
 
+static int pm_map_process_aldebaran(struct packet_manager *pm,
+               uint32_t *buffer, struct qcm_process_device *qpd)
+{
+       struct pm4_mes_map_process_aldebaran *packet;
+       uint64_t vm_page_table_base_addr = qpd->page_table_base;
+
+       packet = (struct pm4_mes_map_process_aldebaran *)buffer;
+       memset(buffer, 0, sizeof(struct pm4_mes_map_process_aldebaran));
+       packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
+                       sizeof(struct pm4_mes_map_process_aldebaran));
+       packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
+       packet->bitfields2.process_quantum = 10;
+       packet->bitfields2.pasid = qpd->pqm->process->pasid;
+       packet->bitfields14.gds_size = qpd->gds_size & 0x3F;
+       packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF;
+       packet->bitfields14.num_gws = (qpd->mapped_gws_queue) ? qpd->num_gws : 0;
+       packet->bitfields14.num_oac = qpd->num_oac;
+       packet->bitfields14.sdma_enable = 1;
+       packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
+
+       packet->sh_mem_config = qpd->sh_mem_config;
+       packet->sh_mem_bases = qpd->sh_mem_bases;
+       if (qpd->tba_addr) {
+               packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
+               packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
+               packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
+       }
+
+       packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
+       packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
+
+       packet->vm_context_page_table_base_addr_lo32 =
+                       lower_32_bits(vm_page_table_base_addr);
+       packet->vm_context_page_table_base_addr_hi32 =
+                       upper_32_bits(vm_page_table_base_addr);
+
+       return 0;
+}
+
 static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer,
                        uint64_t ib, size_t ib_size_in_dwords, bool chain)
 {
@@ -324,3 +363,20 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
        .query_status_size      = sizeof(struct pm4_mes_query_status),
        .release_mem_size       = 0,
 };
+
+const struct packet_manager_funcs kfd_aldebaran_pm_funcs = {
+       .map_process            = pm_map_process_aldebaran,
+       .runlist                = pm_runlist_v9,
+       .set_resources          = pm_set_resources_v9,
+       .map_queues             = pm_map_queues_v9,
+       .unmap_queues           = pm_unmap_queues_v9,
+       .query_status           = pm_query_status_v9,
+       .release_mem            = NULL,
+       .map_process_size       = sizeof(struct pm4_mes_map_process_aldebaran),
+       .runlist_size           = sizeof(struct pm4_mes_runlist),
+       .set_resources_size     = sizeof(struct pm4_mes_set_resources),
+       .map_queues_size        = sizeof(struct pm4_mes_map_queues),
+       .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
+       .query_status_size      = sizeof(struct pm4_mes_query_status),
+       .release_mem_size       = 0,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h
new file mode 100644 (file)
index 0000000..f795ec8
--- /dev/null
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*--------------------MES_MAP_PROCESS (PER DEBUG VMID)--------------------*/
+
+#ifndef PM4_MES_MAP_PROCESS_PER_DEBUG_VMID_DEFINED
+#define PM4_MES_MAP_PROCESS_PER_DEBUG_VMID_DEFINED
+
+struct pm4_mes_map_process_aldebaran {
+       union {
+               union PM4_MES_TYPE_3_HEADER header;     /* header */
+               uint32_t ordinal1;
+       };
+
+       union {
+               struct {
+                       uint32_t pasid:16;          /* 0 - 15  */
+                       uint32_t single_memops:1;   /* 16      */
+                       uint32_t reserved1:1;       /* 17      */
+                       uint32_t debug_vmid:4;      /* 18 - 21 */
+                       uint32_t new_debug:1;       /* 22      */
+                       uint32_t tmz:1;             /* 23      */
+                       uint32_t diq_enable:1;      /* 24      */
+                       uint32_t process_quantum:7; /* 25 - 31 */
+               } bitfields2;
+               uint32_t ordinal2;
+       };
+
+       uint32_t vm_context_page_table_base_addr_lo32;
+
+       uint32_t vm_context_page_table_base_addr_hi32;
+
+       uint32_t sh_mem_bases;
+
+       uint32_t sh_mem_config;
+
+       uint32_t sq_shader_tba_lo;
+
+       uint32_t sq_shader_tba_hi;
+
+       uint32_t sq_shader_tma_lo;
+
+       uint32_t sq_shader_tma_hi;
+
+       uint32_t reserved6;
+
+       uint32_t gds_addr_lo;
+
+       uint32_t gds_addr_hi;
+
+       union {
+               struct {
+                       uint32_t num_gws:7;
+                       uint32_t sdma_enable:1;
+                       uint32_t num_oac:4;
+                       uint32_t gds_size_hi:4;
+                       uint32_t gds_size:6;
+                       uint32_t num_queues:10;
+               } bitfields14;
+               uint32_t ordinal14;
+       };
+
+       uint32_t spi_gdbg_per_vmid_cntl;
+
+       uint32_t tcp_watch_cntl[4];
+
+       uint32_t completion_signal_lo;
+
+       uint32_t completion_signal_hi;
+
+};
+
+#endif
index 0b6595f7acdaae36be4e05539155a17a5fd69cdd..64552f6b8ba4c051c47c67e0a544b677aca06bc3 100644 (file)
@@ -322,6 +322,9 @@ struct kfd_dev {
        unsigned int max_doorbell_slices;
 
        int noretry;
+
+       /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
+       struct dev_pagemap pgmap;
 };
 
 enum kfd_mempool {
@@ -669,7 +672,7 @@ struct kfd_process_device {
 
        /* VM context for GPUVM allocations */
        struct file *drm_file;
-       void *vm;
+       void *drm_priv;
 
        /* GPUVM allocations storage */
        struct idr alloc_idr;
@@ -731,6 +734,17 @@ struct kfd_process_device {
 
 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
 
+struct svm_range_list {
+       struct mutex                    lock;
+       struct rb_root_cached           objects;
+       struct list_head                list;
+       struct work_struct              deferred_list_work;
+       struct list_head                deferred_range_list;
+       spinlock_t                      deferred_list_lock;
+       atomic_t                        evicted_ranges;
+       struct delayed_work             restore_work;
+};
+
 /* Process data */
 struct kfd_process {
        /*
@@ -809,6 +823,12 @@ struct kfd_process {
        struct kobject *kobj;
        struct kobject *kobj_queues;
        struct attribute attr_pasid;
+
+       /* shared virtual memory registered by this process */
+       struct svm_range_list svms;
+       bool svm_disabled;
+
+       bool xnack_enabled;
 };
 
 #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
@@ -842,6 +862,20 @@ struct kfd_process *kfd_create_process(struct file *filep);
 struct kfd_process *kfd_get_process(const struct task_struct *);
 struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);
 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
+
+int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id);
+int kfd_process_gpuid_from_kgd(struct kfd_process *p,
+                              struct amdgpu_device *adev, uint32_t *gpuid,
+                              uint32_t *gpuidx);
+static inline int kfd_process_gpuid_from_gpuidx(struct kfd_process *p,
+                               uint32_t gpuidx, uint32_t *gpuid) {
+       return gpuidx < p->n_pdds ? p->pdds[gpuidx]->dev->id : -EINVAL;
+}
+static inline struct kfd_process_device *kfd_process_device_from_gpuidx(
+                               struct kfd_process *p, uint32_t gpuidx) {
+       return gpuidx < p->n_pdds ? p->pdds[gpuidx] : NULL;
+}
+
 void kfd_unref_process(struct kfd_process *p);
 int kfd_process_evict_queues(struct kfd_process *p);
 int kfd_process_restore_queues(struct kfd_process *p);
@@ -857,6 +891,8 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
                                                        struct kfd_process *p);
 
+bool kfd_process_xnack_mode(struct kfd_process *p, bool supported);
+
 int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
                          struct vm_area_struct *vma);
 
@@ -1052,6 +1088,7 @@ struct packet_manager_funcs {
 
 extern const struct packet_manager_funcs kfd_vi_pm_funcs;
 extern const struct packet_manager_funcs kfd_v9_pm_funcs;
+extern const struct packet_manager_funcs kfd_aldebaran_pm_funcs;
 
 int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
 void pm_uninit(struct packet_manager *pm, bool hanging);
index d97e330a50221904cfe796f4763f1f970ae194d1..9d4f527bda7c4f1c4fd02027c082b5aa3be640bb 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/pm_runtime.h>
 #include "amdgpu_amdkfd.h"
 #include "amdgpu.h"
+#include "kfd_svm.h"
 
 struct mm_struct;
 
@@ -42,6 +43,7 @@ struct mm_struct;
 #include "kfd_device_queue_manager.h"
 #include "kfd_dbgmgr.h"
 #include "kfd_iommu.h"
+#include "kfd_svm.h"
 
 /*
  * List of struct kfd_process (field kfd_process).
@@ -250,7 +252,7 @@ cleanup:
 }
 
 /**
- * @kfd_get_cu_occupancy() - Collect number of waves in-flight on this device
+ * @kfd_get_cu_occupancy - Collect number of waves in-flight on this device
  * by current process. Translates acquired wave count into number of compute units
  * that are occupied.
  *
@@ -647,8 +649,9 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem,
 {
        struct kfd_dev *dev = pdd->dev;
 
-       amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->vm);
-       amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, NULL);
+       amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->drm_priv);
+       amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, pdd->drm_priv,
+                                              NULL);
 }
 
 /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
@@ -667,11 +670,11 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
        int err;
 
        err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
-                                                pdd->vm, &mem, NULL, flags);
+                                                pdd->drm_priv, &mem, NULL, flags);
        if (err)
                goto err_alloc_mem;
 
-       err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->vm);
+       err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->drm_priv);
        if (err)
                goto err_map_mem;
 
@@ -712,7 +715,8 @@ sync_memory_failed:
        return err;
 
 err_map_mem:
-       amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, NULL);
+       amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, pdd->drm_priv,
+                                              NULL);
 err_alloc_mem:
        *kptr = NULL;
        return err;
@@ -901,13 +905,14 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
                for (i = 0; i < p->n_pdds; i++) {
                        struct kfd_process_device *peer_pdd = p->pdds[i];
 
-                       if (!peer_pdd->vm)
+                       if (!peer_pdd->drm_priv)
                                continue;
                        amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
-                               peer_pdd->dev->kgd, mem, peer_pdd->vm);
+                               peer_pdd->dev->kgd, mem, peer_pdd->drm_priv);
                }
 
-               amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem, NULL);
+               amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem,
+                                                      pdd->drm_priv, NULL);
                kfd_process_device_remove_obj_handle(pdd, id);
        }
 }
@@ -932,7 +937,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
 
                if (pdd->drm_file) {
                        amdgpu_amdkfd_gpuvm_release_process_vm(
-                                       pdd->dev->kgd, pdd->vm);
+                                       pdd->dev->kgd, pdd->drm_priv);
                        fput(pdd->drm_file);
                }
 
@@ -1000,6 +1005,7 @@ static void kfd_process_wq_release(struct work_struct *work)
        kfd_iommu_unbind_process(p);
 
        kfd_process_free_outstanding_kfd_bos(p);
+       svm_range_list_fini(p);
 
        kfd_process_destroy_pdds(p);
        dma_fence_put(p->ef);
@@ -1058,6 +1064,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
 
        cancel_delayed_work_sync(&p->eviction_work);
        cancel_delayed_work_sync(&p->restore_work);
+       cancel_delayed_work_sync(&p->svms.restore_work);
 
        mutex_lock(&p->mutex);
 
@@ -1186,6 +1193,56 @@ void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
        }
 }
 
+bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)
+{
+       int i;
+
+       /* On most GFXv9 GPUs, the retry mode in the SQ must match the
+        * boot time retry setting. Mixing processes with different
+        * XNACK/retry settings can hang the GPU.
+        *
+        * Different GPUs can have different noretry settings depending
+        * on HW bugs or limitations. We need to find at least one
+        * XNACK mode for this process that's compatible with all GPUs.
+        * Fortunately GPUs with retry enabled (noretry=0) can run code
+        * built for XNACK-off. On GFXv9 it may perform slower.
+        *
+        * Therefore applications built for XNACK-off can always be
+        * supported and will be our fallback if any GPU does not
+        * support retry.
+        */
+       for (i = 0; i < p->n_pdds; i++) {
+               struct kfd_dev *dev = p->pdds[i]->dev;
+
+               /* Only consider GFXv9 and higher GPUs. Older GPUs don't
+                * support the SVM APIs and don't need to be considered
+                * for the XNACK mode selection.
+                */
+               if (dev->device_info->asic_family < CHIP_VEGA10)
+                       continue;
+               /* Aldebaran can always support XNACK because it can support
+                * per-process XNACK mode selection. But let the dev->noretry
+                * setting still influence the default XNACK mode.
+                */
+               if (supported &&
+                   dev->device_info->asic_family == CHIP_ALDEBARAN)
+                       continue;
+
+               /* GFXv10 and later GPUs do not support shader preemption
+                * during page faults. This can lead to poor QoS for queue
+                * management and memory-manager-related preemptions or
+                * even deadlocks.
+                */
+               if (dev->device_info->asic_family >= CHIP_NAVI10)
+                       return false;
+
+               if (dev->noretry)
+                       return false;
+       }
+
+       return true;
+}
+
 /*
  * On return the kfd_process is fully operational and will be freed when the
  * mm is released
@@ -1205,6 +1262,7 @@ static struct kfd_process *create_process(const struct task_struct *thread)
        process->mm = thread->mm;
        process->lead_thread = thread->group_leader;
        process->n_pdds = 0;
+       process->svm_disabled = false;
        INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
        INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
        process->last_restore_timestamp = get_jiffies_64();
@@ -1224,6 +1282,13 @@ static struct kfd_process *create_process(const struct task_struct *thread)
        if (err != 0)
                goto err_init_apertures;
 
+       /* Check XNACK support after PDDs are created in kfd_init_apertures */
+       process->xnack_enabled = kfd_process_xnack_mode(process, false);
+
+       err = svm_range_list_init(process);
+       if (err)
+               goto err_init_svm_range_list;
+
        /* alloc_notifier needs to find the process in the hash table */
        hash_add_rcu(kfd_processes_table, &process->kfd_processes,
                        (uintptr_t)process->mm);
@@ -1246,6 +1311,8 @@ static struct kfd_process *create_process(const struct task_struct *thread)
 
 err_register_notifier:
        hash_del_rcu(&process->kfd_processes);
+       svm_range_list_fini(process);
+err_init_svm_range_list:
        kfd_process_free_outstanding_kfd_bos(process);
        kfd_process_destroy_pdds(process);
 err_init_apertures:
@@ -1375,7 +1442,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
        if (!drm_file)
                return -EINVAL;
 
-       if (pdd->vm)
+       if (pdd->drm_priv)
                return -EBUSY;
 
        p = pdd->process;
@@ -1383,13 +1450,12 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
 
        ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(
                dev->kgd, drm_file, p->pasid,
-               &pdd->vm, &p->kgd_process_info, &p->ef);
+               &p->kgd_process_info, &p->ef);
        if (ret) {
                pr_err("Failed to create process VM object\n");
                return ret;
        }
-
-       amdgpu_vm_set_task_info(pdd->vm);
+       pdd->drm_priv = drm_file->private_data;
 
        ret = kfd_process_device_reserve_ib_mem(pdd);
        if (ret)
@@ -1405,7 +1471,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
 err_init_cwsr:
 err_reserve_ib_mem:
        kfd_process_device_free_bos(pdd);
-       pdd->vm = NULL;
+       pdd->drm_priv = NULL;
 
        return ret;
 }
@@ -1429,7 +1495,7 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
                return ERR_PTR(-ENOMEM);
        }
 
-       if (!pdd->vm)
+       if (!pdd->drm_priv)
                return ERR_PTR(-ENODEV);
 
        /*
@@ -1600,6 +1666,32 @@ int kfd_process_restore_queues(struct kfd_process *p)
        return ret;
 }
 
+int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id)
+{
+       int i;
+
+       for (i = 0; i < p->n_pdds; i++)
+               if (p->pdds[i] && gpu_id == p->pdds[i]->dev->id)
+                       return i;
+       return -EINVAL;
+}
+
+int
+kfd_process_gpuid_from_kgd(struct kfd_process *p, struct amdgpu_device *adev,
+                          uint32_t *gpuid, uint32_t *gpuidx)
+{
+       struct kgd_dev *kgd = (struct kgd_dev *)adev;
+       int i;
+
+       for (i = 0; i < p->n_pdds; i++)
+               if (p->pdds[i] && p->pdds[i]->dev->kgd == kgd) {
+                       *gpuid = p->pdds[i]->dev->id;
+                       *gpuidx = i;
+                       return 0;
+               }
+       return -EINVAL;
+}
+
 static void evict_process_worker(struct work_struct *work)
 {
        int ret;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
new file mode 100644 (file)
index 0000000..b665e9f
--- /dev/null
@@ -0,0 +1,3085 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2020-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/types.h>
+#include <linux/sched/task.h>
+#include "amdgpu_sync.h"
+#include "amdgpu_object.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_mn.h"
+#include "amdgpu.h"
+#include "amdgpu_xgmi.h"
+#include "kfd_priv.h"
+#include "kfd_svm.h"
+#include "kfd_migrate.h"
+
+#define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1
+
+/* Long enough to ensure no retry fault comes after svm range is restored and
+ * page table is updated.
+ */
+#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING   2000
+
+static void svm_range_evict_svm_bo_worker(struct work_struct *work);
+static bool
+svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
+                                   const struct mmu_notifier_range *range,
+                                   unsigned long cur_seq);
+
+static const struct mmu_interval_notifier_ops svm_range_mn_ops = {
+       .invalidate = svm_range_cpu_invalidate_pagetables,
+};
+
+/**
+ * svm_range_unlink - unlink svm_range from lists and interval tree
+ * @prange: svm range structure to be removed
+ *
+ * Remove the svm_range from the svms and svm_bo lists and the svms
+ * interval tree.
+ *
+ * Context: The caller must hold svms->lock
+ */
+static void svm_range_unlink(struct svm_range *prange)
+{
+       pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms,
+                prange, prange->start, prange->last);
+
+       if (prange->svm_bo) {
+               spin_lock(&prange->svm_bo->list_lock);
+               list_del(&prange->svm_bo_list);
+               spin_unlock(&prange->svm_bo->list_lock);
+       }
+
+       list_del(&prange->list);
+       if (prange->it_node.start != 0 && prange->it_node.last != 0)
+               interval_tree_remove(&prange->it_node, &prange->svms->objects);
+}
+
+static void
+svm_range_add_notifier_locked(struct mm_struct *mm, struct svm_range *prange)
+{
+       pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms,
+                prange, prange->start, prange->last);
+
+       mmu_interval_notifier_insert_locked(&prange->notifier, mm,
+                                    prange->start << PAGE_SHIFT,
+                                    prange->npages << PAGE_SHIFT,
+                                    &svm_range_mn_ops);
+}
+
+/**
+ * svm_range_add_to_svms - add svm range to svms
+ * @prange: svm range structure to be added
+ *
+ * Add the svm range to svms interval tree and link list
+ *
+ * Context: The caller must hold svms->lock
+ */
+static void svm_range_add_to_svms(struct svm_range *prange)
+{
+       pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms,
+                prange, prange->start, prange->last);
+
+       list_add_tail(&prange->list, &prange->svms->list);
+       prange->it_node.start = prange->start;
+       prange->it_node.last = prange->last;
+       interval_tree_insert(&prange->it_node, &prange->svms->objects);
+}
+
+static void svm_range_remove_notifier(struct svm_range *prange)
+{
+       pr_debug("remove notifier svms 0x%p prange 0x%p [0x%lx 0x%lx]\n",
+                prange->svms, prange,
+                prange->notifier.interval_tree.start >> PAGE_SHIFT,
+                prange->notifier.interval_tree.last >> PAGE_SHIFT);
+
+       if (prange->notifier.interval_tree.start != 0 &&
+           prange->notifier.interval_tree.last != 0)
+               mmu_interval_notifier_remove(&prange->notifier);
+}
+
+static int
+svm_range_dma_map_dev(struct device *dev, dma_addr_t **dma_addr,
+                     unsigned long *hmm_pfns, uint64_t npages)
+{
+       enum dma_data_direction dir = DMA_BIDIRECTIONAL;
+       dma_addr_t *addr = *dma_addr;
+       struct page *page;
+       int i, r;
+
+       if (!addr) {
+               addr = kvmalloc_array(npages, sizeof(*addr),
+                                     GFP_KERNEL | __GFP_ZERO);
+               if (!addr)
+                       return -ENOMEM;
+               *dma_addr = addr;
+       }
+
+       for (i = 0; i < npages; i++) {
+               if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]),
+                             "leaking dma mapping\n"))
+                       dma_unmap_page(dev, addr[i], PAGE_SIZE, dir);
+
+               page = hmm_pfn_to_page(hmm_pfns[i]);
+               addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir);
+               r = dma_mapping_error(dev, addr[i]);
+               if (r) {
+                       pr_debug("failed %d dma_map_page\n", r);
+                       return r;
+               }
+               pr_debug("dma mapping 0x%llx for page addr 0x%lx\n",
+                        addr[i] >> PAGE_SHIFT, page_to_pfn(page));
+       }
+       return 0;
+}
+
+static int
+svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
+                 unsigned long *hmm_pfns)
+{
+       struct kfd_process *p;
+       uint32_t gpuidx;
+       int r;
+
+       p = container_of(prange->svms, struct kfd_process, svms);
+
+       for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
+               struct kfd_process_device *pdd;
+               struct amdgpu_device *adev;
+
+               pr_debug("mapping to gpu idx 0x%x\n", gpuidx);
+               pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+               if (!pdd) {
+                       pr_debug("failed to find device idx %d\n", gpuidx);
+                       return -EINVAL;
+               }
+               adev = (struct amdgpu_device *)pdd->dev->kgd;
+
+               r = svm_range_dma_map_dev(adev->dev, &prange->dma_addr[gpuidx],
+                                         hmm_pfns, prange->npages);
+               if (r)
+                       break;
+       }
+
+       return r;
+}
+
+void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
+                        unsigned long offset, unsigned long npages)
+{
+       enum dma_data_direction dir = DMA_BIDIRECTIONAL;
+       int i;
+
+       if (!dma_addr)
+               return;
+
+       for (i = offset; i < offset + npages; i++) {
+               if (!dma_addr[i] || dma_mapping_error(dev, dma_addr[i]))
+                       continue;
+               pr_debug("dma unmapping 0x%llx\n", dma_addr[i] >> PAGE_SHIFT);
+               dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir);
+               dma_addr[i] = 0;
+       }
+}
+
+void svm_range_free_dma_mappings(struct svm_range *prange)
+{
+       struct kfd_process_device *pdd;
+       dma_addr_t *dma_addr;
+       struct device *dev;
+       struct kfd_process *p;
+       uint32_t gpuidx;
+
+       p = container_of(prange->svms, struct kfd_process, svms);
+
+       for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) {
+               dma_addr = prange->dma_addr[gpuidx];
+               if (!dma_addr)
+                       continue;
+
+               pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+               if (!pdd) {
+                       pr_debug("failed to find device idx %d\n", gpuidx);
+                       continue;
+               }
+               dev = &pdd->dev->pdev->dev;
+               svm_range_dma_unmap(dev, dma_addr, 0, prange->npages);
+               kvfree(dma_addr);
+               prange->dma_addr[gpuidx] = NULL;
+       }
+}
+
+static void svm_range_free(struct svm_range *prange)
+{
+       pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange,
+                prange->start, prange->last);
+
+       svm_range_vram_node_free(prange);
+       svm_range_free_dma_mappings(prange);
+       mutex_destroy(&prange->lock);
+       mutex_destroy(&prange->migrate_mutex);
+       kfree(prange);
+}
+
+static void
+svm_range_set_default_attributes(int32_t *location, int32_t *prefetch_loc,
+                                uint8_t *granularity, uint32_t *flags)
+{
+       *location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+       *prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+       *granularity = 9;
+       *flags =
+               KFD_IOCTL_SVM_FLAG_HOST_ACCESS | KFD_IOCTL_SVM_FLAG_COHERENT;
+}
+
+static struct
+svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
+                        uint64_t last)
+{
+       uint64_t size = last - start + 1;
+       struct svm_range *prange;
+       struct kfd_process *p;
+
+       prange = kzalloc(sizeof(*prange), GFP_KERNEL);
+       if (!prange)
+               return NULL;
+       prange->npages = size;
+       prange->svms = svms;
+       prange->start = start;
+       prange->last = last;
+       INIT_LIST_HEAD(&prange->list);
+       INIT_LIST_HEAD(&prange->update_list);
+       INIT_LIST_HEAD(&prange->remove_list);
+       INIT_LIST_HEAD(&prange->insert_list);
+       INIT_LIST_HEAD(&prange->svm_bo_list);
+       INIT_LIST_HEAD(&prange->deferred_list);
+       INIT_LIST_HEAD(&prange->child_list);
+       atomic_set(&prange->invalid, 0);
+       prange->validate_timestamp = 0;
+       mutex_init(&prange->migrate_mutex);
+       mutex_init(&prange->lock);
+
+       p = container_of(svms, struct kfd_process, svms);
+       if (p->xnack_enabled)
+               bitmap_fill(prange->bitmap_access, MAX_GPU_INSTANCE);
+
+       svm_range_set_default_attributes(&prange->preferred_loc,
+                                        &prange->prefetch_loc,
+                                        &prange->granularity, &prange->flags);
+
+       pr_debug("svms 0x%p [0x%llx 0x%llx]\n", svms, start, last);
+
+       return prange;
+}
+
+static bool svm_bo_ref_unless_zero(struct svm_range_bo *svm_bo)
+{
+       if (!svm_bo || !kref_get_unless_zero(&svm_bo->kref))
+               return false;
+
+       return true;
+}
+
+static struct svm_range_bo *svm_range_bo_ref(struct svm_range_bo *svm_bo)
+{
+       if (svm_bo)
+               kref_get(&svm_bo->kref);
+
+       return svm_bo;
+}
+
+static void svm_range_bo_release(struct kref *kref)
+{
+       struct svm_range_bo *svm_bo;
+
+       svm_bo = container_of(kref, struct svm_range_bo, kref);
+       spin_lock(&svm_bo->list_lock);
+       while (!list_empty(&svm_bo->range_list)) {
+               struct svm_range *prange =
+                               list_first_entry(&svm_bo->range_list,
+                                               struct svm_range, svm_bo_list);
+               /* list_del_init tells a concurrent svm_range_vram_node_new when
+                * it's safe to reuse the svm_bo pointer and svm_bo_list head.
+                */
+               list_del_init(&prange->svm_bo_list);
+               spin_unlock(&svm_bo->list_lock);
+
+               pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms,
+                        prange->start, prange->last);
+               mutex_lock(&prange->lock);
+               prange->svm_bo = NULL;
+               mutex_unlock(&prange->lock);
+
+               spin_lock(&svm_bo->list_lock);
+       }
+       spin_unlock(&svm_bo->list_lock);
+       if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) {
+               /* We're not in the eviction worker.
+                * Signal the fence and synchronize with any
+                * pending eviction work.
+                */
+               dma_fence_signal(&svm_bo->eviction_fence->base);
+               cancel_work_sync(&svm_bo->eviction_work);
+       }
+       dma_fence_put(&svm_bo->eviction_fence->base);
+       amdgpu_bo_unref(&svm_bo->bo);
+       kfree(svm_bo);
+}
+
+static void svm_range_bo_unref(struct svm_range_bo *svm_bo)
+{
+       if (!svm_bo)
+               return;
+
+       kref_put(&svm_bo->kref, svm_range_bo_release);
+}
+
+static bool
+svm_range_validate_svm_bo(struct amdgpu_device *adev, struct svm_range *prange)
+{
+       struct amdgpu_device *bo_adev;
+
+       mutex_lock(&prange->lock);
+       if (!prange->svm_bo) {
+               mutex_unlock(&prange->lock);
+               return false;
+       }
+       if (prange->ttm_res) {
+               /* We still have a reference, all is well */
+               mutex_unlock(&prange->lock);
+               return true;
+       }
+       if (svm_bo_ref_unless_zero(prange->svm_bo)) {
+               /*
+                * Migrate from GPU to GPU, remove range from source bo_adev
+                * svm_bo range list, and return false to allocate svm_bo from
+                * destination adev.
+                */
+               bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
+               if (bo_adev != adev) {
+                       mutex_unlock(&prange->lock);
+
+                       spin_lock(&prange->svm_bo->list_lock);
+                       list_del_init(&prange->svm_bo_list);
+                       spin_unlock(&prange->svm_bo->list_lock);
+
+                       svm_range_bo_unref(prange->svm_bo);
+                       return false;
+               }
+               if (READ_ONCE(prange->svm_bo->evicting)) {
+                       struct dma_fence *f;
+                       struct svm_range_bo *svm_bo;
+                       /* The BO is getting evicted,
+                        * we need to get a new one
+                        */
+                       mutex_unlock(&prange->lock);
+                       svm_bo = prange->svm_bo;
+                       f = dma_fence_get(&svm_bo->eviction_fence->base);
+                       svm_range_bo_unref(prange->svm_bo);
+                       /* wait for the fence to avoid long spin-loop
+                        * at list_empty_careful
+                        */
+                       dma_fence_wait(f, false);
+                       dma_fence_put(f);
+               } else {
+                       /* The BO was still around and we got
+                        * a new reference to it
+                        */
+                       mutex_unlock(&prange->lock);
+                       pr_debug("reuse old bo svms 0x%p [0x%lx 0x%lx]\n",
+                                prange->svms, prange->start, prange->last);
+
+                       prange->ttm_res = &prange->svm_bo->bo->tbo.mem;
+                       return true;
+               }
+
+       } else {
+               mutex_unlock(&prange->lock);
+       }
+
+       /* We need a new svm_bo. Spin-loop to wait for concurrent
+        * svm_range_bo_release to finish removing this range from
+        * its range list. After this, it is safe to reuse the
+        * svm_bo pointer and svm_bo_list head.
+        */
+       while (!list_empty_careful(&prange->svm_bo_list))
+               ;
+
+       return false;
+}
+
+static struct svm_range_bo *svm_range_bo_new(void)
+{
+       struct svm_range_bo *svm_bo;
+
+       svm_bo = kzalloc(sizeof(*svm_bo), GFP_KERNEL);
+       if (!svm_bo)
+               return NULL;
+
+       kref_init(&svm_bo->kref);
+       INIT_LIST_HEAD(&svm_bo->range_list);
+       spin_lock_init(&svm_bo->list_lock);
+
+       return svm_bo;
+}
+
+int
+svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
+                       bool clear)
+{
+       struct amdgpu_bo_param bp;
+       struct svm_range_bo *svm_bo;
+       struct amdgpu_bo_user *ubo;
+       struct amdgpu_bo *bo;
+       struct kfd_process *p;
+       struct mm_struct *mm;
+       int r;
+
+       p = container_of(prange->svms, struct kfd_process, svms);
+       pr_debug("pasid: %x svms 0x%p [0x%lx 0x%lx]\n", p->pasid, prange->svms,
+                prange->start, prange->last);
+
+       if (svm_range_validate_svm_bo(adev, prange))
+               return 0;
+
+       svm_bo = svm_range_bo_new();
+       if (!svm_bo) {
+               pr_debug("failed to alloc svm bo\n");
+               return -ENOMEM;
+       }
+       mm = get_task_mm(p->lead_thread);
+       if (!mm) {
+               pr_debug("failed to get mm\n");
+               kfree(svm_bo);
+               return -ESRCH;
+       }
+       svm_bo->svms = prange->svms;
+       svm_bo->eviction_fence =
+               amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
+                                          mm,
+                                          svm_bo);
+       mmput(mm);
+       INIT_WORK(&svm_bo->eviction_work, svm_range_evict_svm_bo_worker);
+       svm_bo->evicting = 0;
+       memset(&bp, 0, sizeof(bp));
+       bp.size = prange->npages * PAGE_SIZE;
+       bp.byte_align = PAGE_SIZE;
+       bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+       bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+       bp.flags |= clear ? AMDGPU_GEM_CREATE_VRAM_CLEARED : 0;
+       bp.flags |= AMDGPU_AMDKFD_CREATE_SVM_BO;
+       bp.type = ttm_bo_type_device;
+       bp.resv = NULL;
+
+       r = amdgpu_bo_create_user(adev, &bp, &ubo);
+       if (r) {
+               pr_debug("failed %d to create bo\n", r);
+               goto create_bo_failed;
+       }
+       bo = &ubo->bo;
+       r = amdgpu_bo_reserve(bo, true);
+       if (r) {
+               pr_debug("failed %d to reserve bo\n", r);
+               goto reserve_bo_failed;
+       }
+
+       r = dma_resv_reserve_shared(bo->tbo.base.resv, 1);
+       if (r) {
+               pr_debug("failed %d to reserve bo\n", r);
+               amdgpu_bo_unreserve(bo);
+               goto reserve_bo_failed;
+       }
+       amdgpu_bo_fence(bo, &svm_bo->eviction_fence->base, true);
+
+       amdgpu_bo_unreserve(bo);
+
+       svm_bo->bo = bo;
+       prange->svm_bo = svm_bo;
+       prange->ttm_res = &bo->tbo.mem;
+       prange->offset = 0;
+
+       spin_lock(&svm_bo->list_lock);
+       list_add(&prange->svm_bo_list, &svm_bo->range_list);
+       spin_unlock(&svm_bo->list_lock);
+
+       return 0;
+
+reserve_bo_failed:
+       amdgpu_bo_unref(&bo);
+create_bo_failed:
+       dma_fence_put(&svm_bo->eviction_fence->base);
+       kfree(svm_bo);
+       prange->ttm_res = NULL;
+
+       return r;
+}
+
+void svm_range_vram_node_free(struct svm_range *prange)
+{
+       svm_range_bo_unref(prange->svm_bo);
+       prange->ttm_res = NULL;
+}
+
+struct amdgpu_device *
+svm_range_get_adev_by_id(struct svm_range *prange, uint32_t gpu_id)
+{
+       struct kfd_process_device *pdd;
+       struct kfd_process *p;
+       int32_t gpu_idx;
+
+       p = container_of(prange->svms, struct kfd_process, svms);
+
+       gpu_idx = kfd_process_gpuidx_from_gpuid(p, gpu_id);
+       if (gpu_idx < 0) {
+               pr_debug("failed to get device by id 0x%x\n", gpu_id);
+               return NULL;
+       }
+       pdd = kfd_process_device_from_gpuidx(p, gpu_idx);
+       if (!pdd) {
+               pr_debug("failed to get device by idx 0x%x\n", gpu_idx);
+               return NULL;
+       }
+
+       return (struct amdgpu_device *)pdd->dev->kgd;
+}
+
+static int svm_range_bo_validate(void *param, struct amdgpu_bo *bo)
+{
+       struct ttm_operation_ctx ctx = { false, false };
+
+       amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
+
+       return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+}
+
+static int
+svm_range_check_attr(struct kfd_process *p,
+                    uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+{
+       uint32_t i;
+       int gpuidx;
+
+       for (i = 0; i < nattr; i++) {
+               switch (attrs[i].type) {
+               case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
+                       if (attrs[i].value != KFD_IOCTL_SVM_LOCATION_SYSMEM &&
+                           attrs[i].value != KFD_IOCTL_SVM_LOCATION_UNDEFINED &&
+                           kfd_process_gpuidx_from_gpuid(p,
+                                                         attrs[i].value) < 0) {
+                               pr_debug("no GPU 0x%x found\n", attrs[i].value);
+                               return -EINVAL;
+                       }
+                       break;
+               case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
+                       if (attrs[i].value != KFD_IOCTL_SVM_LOCATION_SYSMEM &&
+                           kfd_process_gpuidx_from_gpuid(p,
+                                                         attrs[i].value) < 0) {
+                               pr_debug("no GPU 0x%x found\n", attrs[i].value);
+                               return -EINVAL;
+                       }
+                       break;
+               case KFD_IOCTL_SVM_ATTR_ACCESS:
+               case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
+               case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
+                       gpuidx = kfd_process_gpuidx_from_gpuid(p,
+                                                              attrs[i].value);
+                       if (gpuidx < 0) {
+                               pr_debug("no GPU 0x%x found\n", attrs[i].value);
+                               return -EINVAL;
+                       }
+                       break;
+               case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+                       break;
+               case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
+                       break;
+               case KFD_IOCTL_SVM_ATTR_GRANULARITY:
+                       break;
+               default:
+                       pr_debug("unknown attr type 0x%x\n", attrs[i].type);
+                       return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+
+static void
+svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
+                     uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+{
+       uint32_t i;
+       int gpuidx;
+
+       for (i = 0; i < nattr; i++) {
+               switch (attrs[i].type) {
+               case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
+                       prange->preferred_loc = attrs[i].value;
+                       break;
+               case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
+                       prange->prefetch_loc = attrs[i].value;
+                       break;
+               case KFD_IOCTL_SVM_ATTR_ACCESS:
+               case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
+               case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
+                       gpuidx = kfd_process_gpuidx_from_gpuid(p,
+                                                              attrs[i].value);
+                       if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) {
+                               bitmap_clear(prange->bitmap_access, gpuidx, 1);
+                               bitmap_clear(prange->bitmap_aip, gpuidx, 1);
+                       } else if (attrs[i].type == KFD_IOCTL_SVM_ATTR_ACCESS) {
+                               bitmap_set(prange->bitmap_access, gpuidx, 1);
+                               bitmap_clear(prange->bitmap_aip, gpuidx, 1);
+                       } else {
+                               bitmap_clear(prange->bitmap_access, gpuidx, 1);
+                               bitmap_set(prange->bitmap_aip, gpuidx, 1);
+                       }
+                       break;
+               case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+                       prange->flags |= attrs[i].value;
+                       break;
+               case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
+                       prange->flags &= ~attrs[i].value;
+                       break;
+               case KFD_IOCTL_SVM_ATTR_GRANULARITY:
+                       prange->granularity = attrs[i].value;
+                       break;
+               default:
+                       WARN_ONCE(1, "svm_range_check_attrs wasn't called?");
+               }
+       }
+}
+
+/**
+ * svm_range_debug_dump - print all range information from svms
+ * @svms: svm range list header
+ *
+ * debug output svm range start, end, prefetch location from svms
+ * interval tree and link list
+ *
+ * Context: The caller must hold svms->lock
+ */
+static void svm_range_debug_dump(struct svm_range_list *svms)
+{
+       struct interval_tree_node *node;
+       struct svm_range *prange;
+
+       pr_debug("dump svms 0x%p list\n", svms);
+       pr_debug("range\tstart\tpage\tend\t\tlocation\n");
+
+       list_for_each_entry(prange, &svms->list, list) {
+               pr_debug("0x%p 0x%lx\t0x%llx\t0x%llx\t0x%x\n",
+                        prange, prange->start, prange->npages,
+                        prange->start + prange->npages - 1,
+                        prange->actual_loc);
+       }
+
+       pr_debug("dump svms 0x%p interval tree\n", svms);
+       pr_debug("range\tstart\tpage\tend\t\tlocation\n");
+       node = interval_tree_iter_first(&svms->objects, 0, ~0ULL);
+       while (node) {
+               prange = container_of(node, struct svm_range, it_node);
+               pr_debug("0x%p 0x%lx\t0x%llx\t0x%llx\t0x%x\n",
+                        prange, prange->start, prange->npages,
+                        prange->start + prange->npages - 1,
+                        prange->actual_loc);
+               node = interval_tree_iter_next(node, 0, ~0ULL);
+       }
+}
+
+static bool
+svm_range_is_same_attrs(struct svm_range *old, struct svm_range *new)
+{
+       return (old->prefetch_loc == new->prefetch_loc &&
+               old->flags == new->flags &&
+               old->granularity == new->granularity);
+}
+
+static int
+svm_range_split_array(void *ppnew, void *ppold, size_t size,
+                     uint64_t old_start, uint64_t old_n,
+                     uint64_t new_start, uint64_t new_n)
+{
+       unsigned char *new, *old, *pold;
+       uint64_t d;
+
+       if (!ppold)
+               return 0;
+       pold = *(unsigned char **)ppold;
+       if (!pold)
+               return 0;
+
+       new = kvmalloc_array(new_n, size, GFP_KERNEL);
+       if (!new)
+               return -ENOMEM;
+
+       d = (new_start - old_start) * size;
+       memcpy(new, pold + d, new_n * size);
+
+       old = kvmalloc_array(old_n, size, GFP_KERNEL);
+       if (!old) {
+               kvfree(new);
+               return -ENOMEM;
+       }
+
+       d = (new_start == old_start) ? new_n * size : 0;
+       memcpy(old, pold + d, old_n * size);
+
+       kvfree(pold);
+       *(void **)ppold = old;
+       *(void **)ppnew = new;
+
+       return 0;
+}
+
+static int
+svm_range_split_pages(struct svm_range *new, struct svm_range *old,
+                     uint64_t start, uint64_t last)
+{
+       uint64_t npages = last - start + 1;
+       int i, r;
+
+       for (i = 0; i < MAX_GPU_INSTANCE; i++) {
+               r = svm_range_split_array(&new->dma_addr[i], &old->dma_addr[i],
+                                         sizeof(*old->dma_addr[i]), old->start,
+                                         npages, new->start, new->npages);
+               if (r)
+                       return r;
+       }
+
+       return 0;
+}
+
+static int
+svm_range_split_nodes(struct svm_range *new, struct svm_range *old,
+                     uint64_t start, uint64_t last)
+{
+       uint64_t npages = last - start + 1;
+
+       pr_debug("svms 0x%p new prange 0x%p start 0x%lx [0x%llx 0x%llx]\n",
+                new->svms, new, new->start, start, last);
+
+       if (new->start == old->start) {
+               new->offset = old->offset;
+               old->offset += new->npages;
+       } else {
+               new->offset = old->offset + npages;
+       }
+
+       new->svm_bo = svm_range_bo_ref(old->svm_bo);
+       new->ttm_res = old->ttm_res;
+
+       spin_lock(&new->svm_bo->list_lock);
+       list_add(&new->svm_bo_list, &new->svm_bo->range_list);
+       spin_unlock(&new->svm_bo->list_lock);
+
+       return 0;
+}
+
+/**
+ * svm_range_split_adjust - split range and adjust
+ *
+ * @new: new range
+ * @old: the old range
+ * @start: the old range adjust to start address in pages
+ * @last: the old range adjust to last address in pages
+ *
+ * Copy system memory dma_addr or vram ttm_res in old range to new
+ * range from new_start up to size new->npages, the remaining old range is from
+ * start to last
+ *
+ * Return:
+ * 0 - OK, -ENOMEM - out of memory
+ */
+static int
+svm_range_split_adjust(struct svm_range *new, struct svm_range *old,
+                     uint64_t start, uint64_t last)
+{
+       int r;
+
+       pr_debug("svms 0x%p new 0x%lx old [0x%lx 0x%lx] => [0x%llx 0x%llx]\n",
+                new->svms, new->start, old->start, old->last, start, last);
+
+       if (new->start < old->start ||
+           new->last > old->last) {
+               WARN_ONCE(1, "invalid new range start or last\n");
+               return -EINVAL;
+       }
+
+       r = svm_range_split_pages(new, old, start, last);
+       if (r)
+               return r;
+
+       if (old->actual_loc && old->ttm_res) {
+               r = svm_range_split_nodes(new, old, start, last);
+               if (r)
+                       return r;
+       }
+
+       old->npages = last - start + 1;
+       old->start = start;
+       old->last = last;
+       new->flags = old->flags;
+       new->preferred_loc = old->preferred_loc;
+       new->prefetch_loc = old->prefetch_loc;
+       new->actual_loc = old->actual_loc;
+       new->granularity = old->granularity;
+       bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
+       bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
+
+       return 0;
+}
+
+/**
+ * svm_range_split - split a range in 2 ranges
+ *
+ * @prange: the svm range to split
+ * @start: the remaining range start address in pages
+ * @last: the remaining range last address in pages
+ * @new: the result new range generated
+ *
+ * Two cases only:
+ * case 1: if start == prange->start
+ *         prange ==> prange[start, last]
+ *         new range [last + 1, prange->last]
+ *
+ * case 2: if last == prange->last
+ *         prange ==> prange[start, last]
+ *         new range [prange->start, start - 1]
+ *
+ * Return:
+ * 0 - OK, -ENOMEM - out of memory, -EINVAL - invalid start, last
+ */
+static int
+svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last,
+               struct svm_range **new)
+{
+       uint64_t old_start = prange->start;
+       uint64_t old_last = prange->last;
+       struct svm_range_list *svms;
+       int r = 0;
+
+       pr_debug("svms 0x%p [0x%llx 0x%llx] to [0x%llx 0x%llx]\n", prange->svms,
+                old_start, old_last, start, last);
+
+       if (old_start != start && old_last != last)
+               return -EINVAL;
+       if (start < old_start || last > old_last)
+               return -EINVAL;
+
+       svms = prange->svms;
+       if (old_start == start)
+               *new = svm_range_new(svms, last + 1, old_last);
+       else
+               *new = svm_range_new(svms, old_start, start - 1);
+       if (!*new)
+               return -ENOMEM;
+
+       r = svm_range_split_adjust(*new, prange, start, last);
+       if (r) {
+               pr_debug("failed %d split [0x%llx 0x%llx] to [0x%llx 0x%llx]\n",
+                        r, old_start, old_last, start, last);
+               svm_range_free(*new);
+               *new = NULL;
+       }
+
+       return r;
+}
+
+static int
+svm_range_split_tail(struct svm_range *prange, struct svm_range *new,
+                    uint64_t new_last, struct list_head *insert_list)
+{
+       struct svm_range *tail;
+       int r = svm_range_split(prange, prange->start, new_last, &tail);
+
+       if (!r)
+               list_add(&tail->insert_list, insert_list);
+       return r;
+}
+
+static int
+svm_range_split_head(struct svm_range *prange, struct svm_range *new,
+                    uint64_t new_start, struct list_head *insert_list)
+{
+       struct svm_range *head;
+       int r = svm_range_split(prange, new_start, prange->last, &head);
+
+       if (!r)
+               list_add(&head->insert_list, insert_list);
+       return r;
+}
+
+static void
+svm_range_add_child(struct svm_range *prange, struct mm_struct *mm,
+                   struct svm_range *pchild, enum svm_work_list_ops op)
+{
+       pr_debug("add child 0x%p [0x%lx 0x%lx] to prange 0x%p child list %d\n",
+                pchild, pchild->start, pchild->last, prange, op);
+
+       pchild->work_item.mm = mm;
+       pchild->work_item.op = op;
+       list_add_tail(&pchild->child_list, &prange->child_list);
+}
+
+/**
+ * svm_range_split_by_granularity - collect ranges within granularity boundary
+ *
+ * @p: the process with svms list
+ * @mm: mm structure
+ * @addr: the vm fault address in pages, to split the prange
+ * @parent: parent range if prange is from child list
+ * @prange: prange to split
+ *
+ * Trims @prange to be a single aligned block of prange->granularity if
+ * possible. The head and tail are added to the child_list in @parent.
+ *
+ * Context: caller must hold mmap_read_lock and prange->lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+int
+svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
+                              unsigned long addr, struct svm_range *parent,
+                              struct svm_range *prange)
+{
+       struct svm_range *head, *tail;
+       unsigned long start, last, size;
+       int r;
+
+       /* Align splited range start and size to granularity size, then a single
+        * PTE will be used for whole range, this reduces the number of PTE
+        * updated and the L1 TLB space used for translation.
+        */
+       size = 1UL << prange->granularity;
+       start = ALIGN_DOWN(addr, size);
+       last = ALIGN(addr + 1, size) - 1;
+
+       pr_debug("svms 0x%p split [0x%lx 0x%lx] to [0x%lx 0x%lx] size 0x%lx\n",
+                prange->svms, prange->start, prange->last, start, last, size);
+
+       if (start > prange->start) {
+               r = svm_range_split(prange, start, prange->last, &head);
+               if (r)
+                       return r;
+               svm_range_add_child(parent, mm, head, SVM_OP_ADD_RANGE);
+       }
+
+       if (last < prange->last) {
+               r = svm_range_split(prange, prange->start, last, &tail);
+               if (r)
+                       return r;
+               svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE);
+       }
+
+       /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
+       if (p->xnack_enabled && prange->work_item.op == SVM_OP_ADD_RANGE) {
+               prange->work_item.op = SVM_OP_ADD_RANGE_AND_MAP;
+               pr_debug("change prange 0x%p [0x%lx 0x%lx] op %d\n",
+                        prange, prange->start, prange->last,
+                        SVM_OP_ADD_RANGE_AND_MAP);
+       }
+       return 0;
+}
+
+static uint64_t
+svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange)
+{
+       struct amdgpu_device *bo_adev;
+       uint32_t flags = prange->flags;
+       uint32_t mapping_flags = 0;
+       uint64_t pte_flags;
+       bool snoop = !prange->ttm_res;
+       bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT;
+
+       if (prange->svm_bo && prange->ttm_res)
+               bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
+
+       switch (adev->asic_type) {
+       case CHIP_ARCTURUS:
+               if (prange->svm_bo && prange->ttm_res) {
+                       if (bo_adev == adev) {
+                               mapping_flags |= coherent ?
+                                       AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
+                       } else {
+                               mapping_flags |= AMDGPU_VM_MTYPE_UC;
+                               if (amdgpu_xgmi_same_hive(adev, bo_adev))
+                                       snoop = true;
+                       }
+               } else {
+                       mapping_flags |= coherent ?
+                               AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+               }
+               break;
+       case CHIP_ALDEBARAN:
+               if (prange->svm_bo && prange->ttm_res) {
+                       if (bo_adev == adev) {
+                               mapping_flags |= coherent ?
+                                       AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
+                               if (adev->gmc.xgmi.connected_to_cpu)
+                                       snoop = true;
+                       } else {
+                               mapping_flags |= AMDGPU_VM_MTYPE_UC;
+                               if (amdgpu_xgmi_same_hive(adev, bo_adev))
+                                       snoop = true;
+                       }
+               } else {
+                       mapping_flags |= coherent ?
+                               AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+               }
+               break;
+       default:
+               mapping_flags |= coherent ?
+                       AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+       }
+
+       mapping_flags |= AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE;
+
+       if (flags & KFD_IOCTL_SVM_FLAG_GPU_RO)
+               mapping_flags &= ~AMDGPU_VM_PAGE_WRITEABLE;
+       if (flags & KFD_IOCTL_SVM_FLAG_GPU_EXEC)
+               mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
+
+       pte_flags = AMDGPU_PTE_VALID;
+       pte_flags |= prange->ttm_res ? 0 : AMDGPU_PTE_SYSTEM;
+       pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
+
+       pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags);
+
+       pr_debug("svms 0x%p [0x%lx 0x%lx] vram %d PTE 0x%llx mapping 0x%x\n",
+                prange->svms, prange->start, prange->last,
+                prange->ttm_res ? 1:0, pte_flags, mapping_flags);
+
+       return pte_flags;
+}
+
+static int
+svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+                        uint64_t start, uint64_t last,
+                        struct dma_fence **fence)
+{
+       uint64_t init_pte_value = 0;
+
+       pr_debug("[0x%llx 0x%llx]\n", start, last);
+
+       return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, true, NULL,
+                                          start, last, init_pte_value, 0,
+                                          NULL, NULL, fence);
+}
+
+static int
+svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
+                         unsigned long last)
+{
+       DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
+       struct kfd_process_device *pdd;
+       struct dma_fence *fence = NULL;
+       struct amdgpu_device *adev;
+       struct kfd_process *p;
+       uint32_t gpuidx;
+       int r = 0;
+
+       bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
+                 MAX_GPU_INSTANCE);
+       p = container_of(prange->svms, struct kfd_process, svms);
+
+       for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
+               pr_debug("unmap from gpu idx 0x%x\n", gpuidx);
+               pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+               if (!pdd) {
+                       pr_debug("failed to find device idx %d\n", gpuidx);
+                       return -EINVAL;
+               }
+               adev = (struct amdgpu_device *)pdd->dev->kgd;
+
+               r = svm_range_unmap_from_gpu(adev, drm_priv_to_vm(pdd->drm_priv),
+                                            start, last, &fence);
+               if (r)
+                       break;
+
+               if (fence) {
+                       r = dma_fence_wait(fence, false);
+                       dma_fence_put(fence);
+                       fence = NULL;
+                       if (r)
+                               break;
+               }
+               amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev,
+                                                 p->pasid);
+       }
+
+       return r;
+}
+
+static int
+svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+                    struct svm_range *prange, dma_addr_t *dma_addr,
+                    struct amdgpu_device *bo_adev, struct dma_fence **fence)
+{
+       struct amdgpu_bo_va bo_va;
+       uint64_t pte_flags;
+       int r = 0;
+
+       pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
+                prange->last);
+
+       if (prange->svm_bo && prange->ttm_res) {
+               bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev);
+               prange->mapping.bo_va = &bo_va;
+       }
+
+       prange->mapping.start = prange->start;
+       prange->mapping.last = prange->last;
+       prange->mapping.offset = prange->offset;
+       pte_flags = svm_range_get_pte_flags(adev, prange);
+
+       r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL,
+                                       prange->mapping.start,
+                                       prange->mapping.last, pte_flags,
+                                       prange->mapping.offset,
+                                       prange->ttm_res ?
+                                               prange->ttm_res->mm_node : NULL,
+                                       dma_addr, &vm->last_update);
+       if (r) {
+               pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start);
+               goto out;
+       }
+
+       r = amdgpu_vm_update_pdes(adev, vm, false);
+       if (r) {
+               pr_debug("failed %d to update directories 0x%lx\n", r,
+                        prange->start);
+               goto out;
+       }
+
+       if (fence)
+               *fence = dma_fence_get(vm->last_update);
+
+out:
+       prange->mapping.bo_va = NULL;
+       return r;
+}
+
+static int svm_range_map_to_gpus(struct svm_range *prange,
+                                unsigned long *bitmap, bool wait)
+{
+       struct kfd_process_device *pdd;
+       struct amdgpu_device *bo_adev;
+       struct amdgpu_device *adev;
+       struct kfd_process *p;
+       struct dma_fence *fence = NULL;
+       uint32_t gpuidx;
+       int r = 0;
+
+       if (prange->svm_bo && prange->ttm_res)
+               bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
+       else
+               bo_adev = NULL;
+
+       p = container_of(prange->svms, struct kfd_process, svms);
+       for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
+               pr_debug("mapping to gpu idx 0x%x\n", gpuidx);
+               pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+               if (!pdd) {
+                       pr_debug("failed to find device idx %d\n", gpuidx);
+                       return -EINVAL;
+               }
+               adev = (struct amdgpu_device *)pdd->dev->kgd;
+
+               pdd = kfd_bind_process_to_device(pdd->dev, p);
+               if (IS_ERR(pdd))
+                       return -EINVAL;
+
+               if (bo_adev && adev != bo_adev &&
+                   !amdgpu_xgmi_same_hive(adev, bo_adev)) {
+                       pr_debug("cannot map to device idx %d\n", gpuidx);
+                       continue;
+               }
+
+               r = svm_range_map_to_gpu(adev, drm_priv_to_vm(pdd->drm_priv),
+                                        prange, prange->dma_addr[gpuidx],
+                                        bo_adev, wait ? &fence : NULL);
+               if (r)
+                       break;
+
+               if (fence) {
+                       r = dma_fence_wait(fence, false);
+                       dma_fence_put(fence);
+                       fence = NULL;
+                       if (r) {
+                               pr_debug("failed %d to dma fence wait\n", r);
+                               break;
+                       }
+               }
+
+               amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev,
+                                                 p->pasid);
+       }
+
+       return r;
+}
+
+struct svm_validate_context {
+       struct kfd_process *process;
+       struct svm_range *prange;
+       bool intr;
+       unsigned long bitmap[MAX_GPU_INSTANCE];
+       struct ttm_validate_buffer tv[MAX_GPU_INSTANCE+1];
+       struct list_head validate_list;
+       struct ww_acquire_ctx ticket;
+};
+
+static int svm_range_reserve_bos(struct svm_validate_context *ctx)
+{
+       struct kfd_process_device *pdd;
+       struct amdgpu_device *adev;
+       struct amdgpu_vm *vm;
+       uint32_t gpuidx;
+       int r;
+
+       INIT_LIST_HEAD(&ctx->validate_list);
+       for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
+               pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
+               if (!pdd) {
+                       pr_debug("failed to find device idx %d\n", gpuidx);
+                       return -EINVAL;
+               }
+               adev = (struct amdgpu_device *)pdd->dev->kgd;
+               vm = drm_priv_to_vm(pdd->drm_priv);
+
+               ctx->tv[gpuidx].bo = &vm->root.base.bo->tbo;
+               ctx->tv[gpuidx].num_shared = 4;
+               list_add(&ctx->tv[gpuidx].head, &ctx->validate_list);
+       }
+       if (ctx->prange->svm_bo && ctx->prange->ttm_res) {
+               ctx->tv[MAX_GPU_INSTANCE].bo = &ctx->prange->svm_bo->bo->tbo;
+               ctx->tv[MAX_GPU_INSTANCE].num_shared = 1;
+               list_add(&ctx->tv[MAX_GPU_INSTANCE].head, &ctx->validate_list);
+       }
+
+       r = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->validate_list,
+                                  ctx->intr, NULL);
+       if (r) {
+               pr_debug("failed %d to reserve bo\n", r);
+               return r;
+       }
+
+       for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
+               pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
+               if (!pdd) {
+                       pr_debug("failed to find device idx %d\n", gpuidx);
+                       r = -EINVAL;
+                       goto unreserve_out;
+               }
+               adev = (struct amdgpu_device *)pdd->dev->kgd;
+
+               r = amdgpu_vm_validate_pt_bos(adev, drm_priv_to_vm(pdd->drm_priv),
+                                             svm_range_bo_validate, NULL);
+               if (r) {
+                       pr_debug("failed %d validate pt bos\n", r);
+                       goto unreserve_out;
+               }
+       }
+
+       return 0;
+
+unreserve_out:
+       ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list);
+       return r;
+}
+
+static void svm_range_unreserve_bos(struct svm_validate_context *ctx)
+{
+       ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list);
+}
+
+/*
+ * Validation+GPU mapping with concurrent invalidation (MMU notifiers)
+ *
+ * To prevent concurrent destruction or change of range attributes, the
+ * svm_read_lock must be held. The caller must not hold the svm_write_lock
+ * because that would block concurrent evictions and lead to deadlocks. To
+ * serialize concurrent migrations or validations of the same range, the
+ * prange->migrate_mutex must be held.
+ *
+ * For VRAM ranges, the SVM BO must be allocated and valid (protected by its
+ * eviction fence.
+ *
+ * The following sequence ensures race-free validation and GPU mapping:
+ *
+ * 1. Reserve page table (and SVM BO if range is in VRAM)
+ * 2. hmm_range_fault to get page addresses (if system memory)
+ * 3. DMA-map pages (if system memory)
+ * 4-a. Take notifier lock
+ * 4-b. Check that pages still valid (mmu_interval_read_retry)
+ * 4-c. Check that the range was not split or otherwise invalidated
+ * 4-d. Update GPU page table
+ * 4.e. Release notifier lock
+ * 5. Release page table (and SVM BO) reservation
+ */
+static int svm_range_validate_and_map(struct mm_struct *mm,
+                                     struct svm_range *prange,
+                                     int32_t gpuidx, bool intr, bool wait)
+{
+       struct svm_validate_context ctx;
+       struct hmm_range *hmm_range;
+       int r = 0;
+
+       ctx.process = container_of(prange->svms, struct kfd_process, svms);
+       ctx.prange = prange;
+       ctx.intr = intr;
+
+       if (gpuidx < MAX_GPU_INSTANCE) {
+               bitmap_zero(ctx.bitmap, MAX_GPU_INSTANCE);
+               bitmap_set(ctx.bitmap, gpuidx, 1);
+       } else if (ctx.process->xnack_enabled) {
+               bitmap_copy(ctx.bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE);
+
+               /* If prefetch range to GPU, or GPU retry fault migrate range to
+                * GPU, which has ACCESS attribute to the range, create mapping
+                * on that GPU.
+                */
+               if (prange->actual_loc) {
+                       gpuidx = kfd_process_gpuidx_from_gpuid(ctx.process,
+                                                       prange->actual_loc);
+                       if (gpuidx < 0) {
+                               WARN_ONCE(1, "failed get device by id 0x%x\n",
+                                        prange->actual_loc);
+                               return -EINVAL;
+                       }
+                       if (test_bit(gpuidx, prange->bitmap_access))
+                               bitmap_set(ctx.bitmap, gpuidx, 1);
+               }
+       } else {
+               bitmap_or(ctx.bitmap, prange->bitmap_access,
+                         prange->bitmap_aip, MAX_GPU_INSTANCE);
+       }
+
+       if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE))
+               return 0;
+
+       if (prange->actual_loc && !prange->ttm_res) {
+               /* This should never happen. actual_loc gets set by
+                * svm_migrate_ram_to_vram after allocating a BO.
+                */
+               WARN(1, "VRAM BO missing during validation\n");
+               return -EINVAL;
+       }
+
+       svm_range_reserve_bos(&ctx);
+
+       if (!prange->actual_loc) {
+               r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
+                                              prange->start << PAGE_SHIFT,
+                                              prange->npages, &hmm_range,
+                                              false, true);
+               if (r) {
+                       pr_debug("failed %d to get svm range pages\n", r);
+                       goto unreserve_out;
+               }
+
+               r = svm_range_dma_map(prange, ctx.bitmap,
+                                     hmm_range->hmm_pfns);
+               if (r) {
+                       pr_debug("failed %d to dma map range\n", r);
+                       goto unreserve_out;
+               }
+
+               prange->validated_once = true;
+       }
+
+       svm_range_lock(prange);
+       if (!prange->actual_loc) {
+               if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
+                       pr_debug("hmm update the range, need validate again\n");
+                       r = -EAGAIN;
+                       goto unlock_out;
+               }
+       }
+       if (!list_empty(&prange->child_list)) {
+               pr_debug("range split by unmap in parallel, validate again\n");
+               r = -EAGAIN;
+               goto unlock_out;
+       }
+
+       r = svm_range_map_to_gpus(prange, ctx.bitmap, wait);
+
+unlock_out:
+       svm_range_unlock(prange);
+unreserve_out:
+       svm_range_unreserve_bos(&ctx);
+
+       if (!r)
+               prange->validate_timestamp = ktime_to_us(ktime_get());
+
+       return r;
+}
+
+/**
+ * svm_range_list_lock_and_flush_work - flush pending deferred work
+ *
+ * @svms: the svm range list
+ * @mm: the mm structure
+ *
+ * Context: Returns with mmap write lock held, pending deferred work flushed
+ *
+ */
+static void
+svm_range_list_lock_and_flush_work(struct svm_range_list *svms,
+                                  struct mm_struct *mm)
+{
+retry_flush_work:
+       flush_work(&svms->deferred_list_work);
+       mmap_write_lock(mm);
+
+       if (list_empty(&svms->deferred_range_list))
+               return;
+       mmap_write_unlock(mm);
+       pr_debug("retry flush\n");
+       goto retry_flush_work;
+}
+
+static void svm_range_restore_work(struct work_struct *work)
+{
+       struct delayed_work *dwork = to_delayed_work(work);
+       struct amdkfd_process_info *process_info;
+       struct svm_range_list *svms;
+       struct svm_range *prange;
+       struct kfd_process *p;
+       struct mm_struct *mm;
+       int evicted_ranges;
+       int invalid;
+       int r;
+
+       svms = container_of(dwork, struct svm_range_list, restore_work);
+       evicted_ranges = atomic_read(&svms->evicted_ranges);
+       if (!evicted_ranges)
+               return;
+
+       pr_debug("restore svm ranges\n");
+
+       /* kfd_process_notifier_release destroys this worker thread. So during
+        * the lifetime of this thread, kfd_process and mm will be valid.
+        */
+       p = container_of(svms, struct kfd_process, svms);
+       process_info = p->kgd_process_info;
+       mm = p->mm;
+       if (!mm)
+               return;
+
+       mutex_lock(&process_info->lock);
+       svm_range_list_lock_and_flush_work(svms, mm);
+       mutex_lock(&svms->lock);
+
+       evicted_ranges = atomic_read(&svms->evicted_ranges);
+
+       list_for_each_entry(prange, &svms->list, list) {
+               invalid = atomic_read(&prange->invalid);
+               if (!invalid)
+                       continue;
+
+               pr_debug("restoring svms 0x%p prange 0x%p [0x%lx %lx] inv %d\n",
+                        prange->svms, prange, prange->start, prange->last,
+                        invalid);
+
+               /*
+                * If range is migrating, wait for migration is done.
+                */
+               mutex_lock(&prange->migrate_mutex);
+
+               r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
+                                              false, true);
+               if (r)
+                       pr_debug("failed %d to map 0x%lx to gpus\n", r,
+                                prange->start);
+
+               mutex_unlock(&prange->migrate_mutex);
+               if (r)
+                       goto out_reschedule;
+
+               if (atomic_cmpxchg(&prange->invalid, invalid, 0) != invalid)
+                       goto out_reschedule;
+       }
+
+       if (atomic_cmpxchg(&svms->evicted_ranges, evicted_ranges, 0) !=
+           evicted_ranges)
+               goto out_reschedule;
+
+       evicted_ranges = 0;
+
+       r = kgd2kfd_resume_mm(mm);
+       if (r) {
+               /* No recovery from this failure. Probably the CP is
+                * hanging. No point trying again.
+                */
+               pr_debug("failed %d to resume KFD\n", r);
+       }
+
+       pr_debug("restore svm ranges successfully\n");
+
+out_reschedule:
+       mutex_unlock(&svms->lock);
+       mmap_write_unlock(mm);
+       mutex_unlock(&process_info->lock);
+
+       /* If validation failed, reschedule another attempt */
+       if (evicted_ranges) {
+               pr_debug("reschedule to restore svm range\n");
+               schedule_delayed_work(&svms->restore_work,
+                       msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+       }
+}
+
+/**
+ * svm_range_evict - evict svm range
+ *
+ * Stop all queues of the process to ensure GPU doesn't access the memory, then
+ * return to let CPU evict the buffer and proceed CPU pagetable update.
+ *
+ * Don't need use lock to sync cpu pagetable invalidation with GPU execution.
+ * If invalidation happens while restore work is running, restore work will
+ * restart to ensure to get the latest CPU pages mapping to GPU, then start
+ * the queues.
+ */
+static int
+svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
+               unsigned long start, unsigned long last)
+{
+       struct svm_range_list *svms = prange->svms;
+       struct kfd_process *p;
+       int r = 0;
+
+       p = container_of(svms, struct kfd_process, svms);
+
+       pr_debug("invalidate svms 0x%p prange [0x%lx 0x%lx] [0x%lx 0x%lx]\n",
+                svms, prange->start, prange->last, start, last);
+
+       if (!p->xnack_enabled) {
+               int evicted_ranges;
+
+               atomic_inc(&prange->invalid);
+               evicted_ranges = atomic_inc_return(&svms->evicted_ranges);
+               if (evicted_ranges != 1)
+                       return r;
+
+               pr_debug("evicting svms 0x%p range [0x%lx 0x%lx]\n",
+                        prange->svms, prange->start, prange->last);
+
+               /* First eviction, stop the queues */
+               r = kgd2kfd_quiesce_mm(mm);
+               if (r)
+                       pr_debug("failed to quiesce KFD\n");
+
+               pr_debug("schedule to restore svm %p ranges\n", svms);
+               schedule_delayed_work(&svms->restore_work,
+                       msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+       } else {
+               struct svm_range *pchild;
+               unsigned long s, l;
+
+               pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n",
+                        prange->svms, start, last);
+               list_for_each_entry(pchild, &prange->child_list, child_list) {
+                       mutex_lock_nested(&pchild->lock, 1);
+                       s = max(start, pchild->start);
+                       l = min(last, pchild->last);
+                       if (l >= s)
+                               svm_range_unmap_from_gpus(pchild, s, l);
+                       mutex_unlock(&pchild->lock);
+               }
+               s = max(start, prange->start);
+               l = min(last, prange->last);
+               if (l >= s)
+                       svm_range_unmap_from_gpus(prange, s, l);
+       }
+
+       return r;
+}
+
+static struct svm_range *svm_range_clone(struct svm_range *old)
+{
+       struct svm_range *new;
+
+       new = svm_range_new(old->svms, old->start, old->last);
+       if (!new)
+               return NULL;
+
+       if (old->svm_bo) {
+               new->ttm_res = old->ttm_res;
+               new->offset = old->offset;
+               new->svm_bo = svm_range_bo_ref(old->svm_bo);
+               spin_lock(&new->svm_bo->list_lock);
+               list_add(&new->svm_bo_list, &new->svm_bo->range_list);
+               spin_unlock(&new->svm_bo->list_lock);
+       }
+       new->flags = old->flags;
+       new->preferred_loc = old->preferred_loc;
+       new->prefetch_loc = old->prefetch_loc;
+       new->actual_loc = old->actual_loc;
+       new->granularity = old->granularity;
+       bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
+       bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
+
+       return new;
+}
+
+/**
+ * svm_range_handle_overlap - split overlap ranges
+ * @svms: svm range list header
+ * @new: range added with this attributes
+ * @start: range added start address, in pages
+ * @last: range last address, in pages
+ * @update_list: output, the ranges attributes are updated. For set_attr, this
+ *               will do validation and map to GPUs. For unmap, this will be
+ *               removed and unmap from GPUs
+ * @insert_list: output, the ranges will be inserted into svms, attributes are
+ *               not changes. For set_attr, this will add into svms.
+ * @remove_list:output, the ranges will be removed from svms
+ * @left: the remaining range after overlap, For set_attr, this will be added
+ *        as new range.
+ *
+ * Total have 5 overlap cases.
+ *
+ * This function handles overlap of an address interval with existing
+ * struct svm_ranges for applying new attributes. This may require
+ * splitting existing struct svm_ranges. All changes should be applied to
+ * the range_list and interval tree transactionally. If any split operation
+ * fails, the entire update fails. Therefore the existing overlapping
+ * svm_ranges are cloned and the original svm_ranges left unchanged. If the
+ * transaction succeeds, the modified clones are added and the originals
+ * freed. Otherwise the clones are removed and the old svm_ranges remain.
+ *
+ * Context: The caller must hold svms->lock
+ */
+static int
+svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new,
+                        unsigned long start, unsigned long last,
+                        struct list_head *update_list,
+                        struct list_head *insert_list,
+                        struct list_head *remove_list,
+                        unsigned long *left)
+{
+       struct interval_tree_node *node;
+       struct svm_range *prange;
+       struct svm_range *tmp;
+       int r = 0;
+
+       INIT_LIST_HEAD(update_list);
+       INIT_LIST_HEAD(insert_list);
+       INIT_LIST_HEAD(remove_list);
+
+       node = interval_tree_iter_first(&svms->objects, start, last);
+       while (node) {
+               struct interval_tree_node *next;
+               struct svm_range *old;
+               unsigned long next_start;
+
+               pr_debug("found overlap node [0x%lx 0x%lx]\n", node->start,
+                        node->last);
+
+               old = container_of(node, struct svm_range, it_node);
+               next = interval_tree_iter_next(node, start, last);
+               next_start = min(node->last, last) + 1;
+
+               if (node->start < start || node->last > last) {
+                       /* node intersects the updated range, clone+split it */
+                       prange = svm_range_clone(old);
+                       if (!prange) {
+                               r = -ENOMEM;
+                               goto out;
+                       }
+
+                       list_add(&old->remove_list, remove_list);
+                       list_add(&prange->insert_list, insert_list);
+
+                       if (node->start < start) {
+                               pr_debug("change old range start\n");
+                               r = svm_range_split_head(prange, new, start,
+                                                        insert_list);
+                               if (r)
+                                       goto out;
+                       }
+                       if (node->last > last) {
+                               pr_debug("change old range last\n");
+                               r = svm_range_split_tail(prange, new, last,
+                                                        insert_list);
+                               if (r)
+                                       goto out;
+                       }
+               } else {
+                       /* The node is contained within start..last,
+                        * just update it
+                        */
+                       prange = old;
+               }
+
+               if (!svm_range_is_same_attrs(prange, new))
+                       list_add(&prange->update_list, update_list);
+
+               /* insert a new node if needed */
+               if (node->start > start) {
+                       prange = svm_range_new(prange->svms, start,
+                                              node->start - 1);
+                       if (!prange) {
+                               r = -ENOMEM;
+                               goto out;
+                       }
+
+                       list_add(&prange->insert_list, insert_list);
+                       list_add(&prange->update_list, update_list);
+               }
+
+               node = next;
+               start = next_start;
+       }
+
+       if (left && start <= last)
+               *left = last - start + 1;
+
+out:
+       if (r)
+               list_for_each_entry_safe(prange, tmp, insert_list, insert_list)
+                       svm_range_free(prange);
+
+       return r;
+}
+
+static void
+svm_range_update_notifier_and_interval_tree(struct mm_struct *mm,
+                                           struct svm_range *prange)
+{
+       unsigned long start;
+       unsigned long last;
+
+       start = prange->notifier.interval_tree.start >> PAGE_SHIFT;
+       last = prange->notifier.interval_tree.last >> PAGE_SHIFT;
+
+       if (prange->start == start && prange->last == last)
+               return;
+
+       pr_debug("up notifier 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n",
+                 prange->svms, prange, start, last, prange->start,
+                 prange->last);
+
+       if (start != 0 && last != 0) {
+               interval_tree_remove(&prange->it_node, &prange->svms->objects);
+               svm_range_remove_notifier(prange);
+       }
+       prange->it_node.start = prange->start;
+       prange->it_node.last = prange->last;
+
+       interval_tree_insert(&prange->it_node, &prange->svms->objects);
+       svm_range_add_notifier_locked(mm, prange);
+}
+
+static void
+svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange)
+{
+       struct mm_struct *mm = prange->work_item.mm;
+
+       switch (prange->work_item.op) {
+       case SVM_OP_NULL:
+               pr_debug("NULL OP 0x%p prange 0x%p [0x%lx 0x%lx]\n",
+                        svms, prange, prange->start, prange->last);
+               break;
+       case SVM_OP_UNMAP_RANGE:
+               pr_debug("remove 0x%p prange 0x%p [0x%lx 0x%lx]\n",
+                        svms, prange, prange->start, prange->last);
+               svm_range_unlink(prange);
+               svm_range_remove_notifier(prange);
+               svm_range_free(prange);
+               break;
+       case SVM_OP_UPDATE_RANGE_NOTIFIER:
+               pr_debug("update notifier 0x%p prange 0x%p [0x%lx 0x%lx]\n",
+                        svms, prange, prange->start, prange->last);
+               svm_range_update_notifier_and_interval_tree(mm, prange);
+               break;
+       case SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP:
+               pr_debug("update and map 0x%p prange 0x%p [0x%lx 0x%lx]\n",
+                        svms, prange, prange->start, prange->last);
+               svm_range_update_notifier_and_interval_tree(mm, prange);
+               /* TODO: implement deferred validation and mapping */
+               break;
+       case SVM_OP_ADD_RANGE:
+               pr_debug("add 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms, prange,
+                        prange->start, prange->last);
+               svm_range_add_to_svms(prange);
+               svm_range_add_notifier_locked(mm, prange);
+               break;
+       case SVM_OP_ADD_RANGE_AND_MAP:
+               pr_debug("add and map 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms,
+                        prange, prange->start, prange->last);
+               svm_range_add_to_svms(prange);
+               svm_range_add_notifier_locked(mm, prange);
+               /* TODO: implement deferred validation and mapping */
+               break;
+       default:
+               WARN_ONCE(1, "Unknown prange 0x%p work op %d\n", prange,
+                        prange->work_item.op);
+       }
+}
+
+static void svm_range_drain_retry_fault(struct svm_range_list *svms)
+{
+       struct kfd_process_device *pdd;
+       struct amdgpu_device *adev;
+       struct kfd_process *p;
+       uint32_t i;
+
+       p = container_of(svms, struct kfd_process, svms);
+
+       for (i = 0; i < p->n_pdds; i++) {
+               pdd = p->pdds[i];
+               if (!pdd)
+                       continue;
+
+               pr_debug("drain retry fault gpu %d svms %p\n", i, svms);
+               adev = (struct amdgpu_device *)pdd->dev->kgd;
+
+               amdgpu_ih_wait_on_checkpoint_process(adev, &adev->irq.ih1);
+               pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
+       }
+}
+
+static void svm_range_deferred_list_work(struct work_struct *work)
+{
+       struct svm_range_list *svms;
+       struct svm_range *prange;
+       struct mm_struct *mm;
+
+       svms = container_of(work, struct svm_range_list, deferred_list_work);
+       pr_debug("enter svms 0x%p\n", svms);
+
+       spin_lock(&svms->deferred_list_lock);
+       while (!list_empty(&svms->deferred_range_list)) {
+               prange = list_first_entry(&svms->deferred_range_list,
+                                         struct svm_range, deferred_list);
+               spin_unlock(&svms->deferred_list_lock);
+               pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange,
+                        prange->start, prange->last, prange->work_item.op);
+
+               /* Make sure no stale retry fault coming after range is freed */
+               if (prange->work_item.op == SVM_OP_UNMAP_RANGE)
+                       svm_range_drain_retry_fault(prange->svms);
+
+               mm = prange->work_item.mm;
+               mmap_write_lock(mm);
+               mutex_lock(&svms->lock);
+
+               /* Remove from deferred_list must be inside mmap write lock,
+                * otherwise, svm_range_list_lock_and_flush_work may hold mmap
+                * write lock, and continue because deferred_list is empty, then
+                * deferred_list handle is blocked by mmap write lock.
+                */
+               spin_lock(&svms->deferred_list_lock);
+               list_del_init(&prange->deferred_list);
+               spin_unlock(&svms->deferred_list_lock);
+
+               mutex_lock(&prange->migrate_mutex);
+               while (!list_empty(&prange->child_list)) {
+                       struct svm_range *pchild;
+
+                       pchild = list_first_entry(&prange->child_list,
+                                               struct svm_range, child_list);
+                       pr_debug("child prange 0x%p op %d\n", pchild,
+                                pchild->work_item.op);
+                       list_del_init(&pchild->child_list);
+                       svm_range_handle_list_op(svms, pchild);
+               }
+               mutex_unlock(&prange->migrate_mutex);
+
+               svm_range_handle_list_op(svms, prange);
+               mutex_unlock(&svms->lock);
+               mmap_write_unlock(mm);
+
+               spin_lock(&svms->deferred_list_lock);
+       }
+       spin_unlock(&svms->deferred_list_lock);
+
+       pr_debug("exit svms 0x%p\n", svms);
+}
+
+void
+svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange,
+                       struct mm_struct *mm, enum svm_work_list_ops op)
+{
+       spin_lock(&svms->deferred_list_lock);
+       /* if prange is on the deferred list */
+       if (!list_empty(&prange->deferred_list)) {
+               pr_debug("update exist prange 0x%p work op %d\n", prange, op);
+               WARN_ONCE(prange->work_item.mm != mm, "unmatch mm\n");
+               if (op != SVM_OP_NULL &&
+                   prange->work_item.op != SVM_OP_UNMAP_RANGE)
+                       prange->work_item.op = op;
+       } else {
+               prange->work_item.op = op;
+               prange->work_item.mm = mm;
+               list_add_tail(&prange->deferred_list,
+                             &prange->svms->deferred_range_list);
+               pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n",
+                        prange, prange->start, prange->last, op);
+       }
+       spin_unlock(&svms->deferred_list_lock);
+}
+
+void schedule_deferred_list_work(struct svm_range_list *svms)
+{
+       spin_lock(&svms->deferred_list_lock);
+       if (!list_empty(&svms->deferred_range_list))
+               schedule_work(&svms->deferred_list_work);
+       spin_unlock(&svms->deferred_list_lock);
+}
+
+static void
+svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent,
+                     struct svm_range *prange, unsigned long start,
+                     unsigned long last)
+{
+       struct svm_range *head;
+       struct svm_range *tail;
+
+       if (prange->work_item.op == SVM_OP_UNMAP_RANGE) {
+               pr_debug("prange 0x%p [0x%lx 0x%lx] is already freed\n", prange,
+                        prange->start, prange->last);
+               return;
+       }
+       if (start > prange->last || last < prange->start)
+               return;
+
+       head = tail = prange;
+       if (start > prange->start)
+               svm_range_split(prange, prange->start, start - 1, &tail);
+       if (last < tail->last)
+               svm_range_split(tail, last + 1, tail->last, &head);
+
+       if (head != prange && tail != prange) {
+               svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE);
+               svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE);
+       } else if (tail != prange) {
+               svm_range_add_child(parent, mm, tail, SVM_OP_UNMAP_RANGE);
+       } else if (head != prange) {
+               svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE);
+       } else if (parent != prange) {
+               prange->work_item.op = SVM_OP_UNMAP_RANGE;
+       }
+}
+
+static void
+svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
+                        unsigned long start, unsigned long last)
+{
+       struct svm_range_list *svms;
+       struct svm_range *pchild;
+       struct kfd_process *p;
+       unsigned long s, l;
+       bool unmap_parent;
+
+       p = kfd_lookup_process_by_mm(mm);
+       if (!p)
+               return;
+       svms = &p->svms;
+
+       pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms,
+                prange, prange->start, prange->last, start, last);
+
+       unmap_parent = start <= prange->start && last >= prange->last;
+
+       list_for_each_entry(pchild, &prange->child_list, child_list) {
+               mutex_lock_nested(&pchild->lock, 1);
+               s = max(start, pchild->start);
+               l = min(last, pchild->last);
+               if (l >= s)
+                       svm_range_unmap_from_gpus(pchild, s, l);
+               svm_range_unmap_split(mm, prange, pchild, start, last);
+               mutex_unlock(&pchild->lock);
+       }
+       s = max(start, prange->start);
+       l = min(last, prange->last);
+       if (l >= s)
+               svm_range_unmap_from_gpus(prange, s, l);
+       svm_range_unmap_split(mm, prange, prange, start, last);
+
+       if (unmap_parent)
+               svm_range_add_list_work(svms, prange, mm, SVM_OP_UNMAP_RANGE);
+       else
+               svm_range_add_list_work(svms, prange, mm,
+                                       SVM_OP_UPDATE_RANGE_NOTIFIER);
+       schedule_deferred_list_work(svms);
+
+       kfd_unref_process(p);
+}
+
+/**
+ * svm_range_cpu_invalidate_pagetables - interval notifier callback
+ *
+ * If event is MMU_NOTIFY_UNMAP, this is from CPU unmap range, otherwise, it
+ * is from migration, or CPU page invalidation callback.
+ *
+ * For unmap event, unmap range from GPUs, remove prange from svms in a delayed
+ * work thread, and split prange if only part of prange is unmapped.
+ *
+ * For invalidation event, if GPU retry fault is not enabled, evict the queues,
+ * then schedule svm_range_restore_work to update GPU mapping and resume queues.
+ * If GPU retry fault is enabled, unmap the svm range from GPU, retry fault will
+ * update GPU mapping to recover.
+ *
+ * Context: mmap lock, notifier_invalidate_start lock are held
+ *          for invalidate event, prange lock is held if this is from migration
+ */
+static bool
+svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
+                                   const struct mmu_notifier_range *range,
+                                   unsigned long cur_seq)
+{
+       struct svm_range *prange;
+       unsigned long start;
+       unsigned long last;
+
+       if (range->event == MMU_NOTIFY_RELEASE)
+               return true;
+
+       start = mni->interval_tree.start;
+       last = mni->interval_tree.last;
+       start = (start > range->start ? start : range->start) >> PAGE_SHIFT;
+       last = (last < (range->end - 1) ? last : range->end - 1) >> PAGE_SHIFT;
+       pr_debug("[0x%lx 0x%lx] range[0x%lx 0x%lx] notifier[0x%lx 0x%lx] %d\n",
+                start, last, range->start >> PAGE_SHIFT,
+                (range->end - 1) >> PAGE_SHIFT,
+                mni->interval_tree.start >> PAGE_SHIFT,
+                mni->interval_tree.last >> PAGE_SHIFT, range->event);
+
+       prange = container_of(mni, struct svm_range, notifier);
+
+       svm_range_lock(prange);
+       mmu_interval_set_seq(mni, cur_seq);
+
+       switch (range->event) {
+       case MMU_NOTIFY_UNMAP:
+               svm_range_unmap_from_cpu(mni->mm, prange, start, last);
+               break;
+       default:
+               svm_range_evict(prange, mni->mm, start, last);
+               break;
+       }
+
+       svm_range_unlock(prange);
+
+       return true;
+}
+
+/**
+ * svm_range_from_addr - find svm range from fault address
+ * @svms: svm range list header
+ * @addr: address to search range interval tree, in pages
+ * @parent: parent range if range is on child list
+ *
+ * Context: The caller must hold svms->lock
+ *
+ * Return: the svm_range found or NULL
+ */
+struct svm_range *
+svm_range_from_addr(struct svm_range_list *svms, unsigned long addr,
+                   struct svm_range **parent)
+{
+       struct interval_tree_node *node;
+       struct svm_range *prange;
+       struct svm_range *pchild;
+
+       node = interval_tree_iter_first(&svms->objects, addr, addr);
+       if (!node)
+               return NULL;
+
+       prange = container_of(node, struct svm_range, it_node);
+       pr_debug("address 0x%lx prange [0x%lx 0x%lx] node [0x%lx 0x%lx]\n",
+                addr, prange->start, prange->last, node->start, node->last);
+
+       if (addr >= prange->start && addr <= prange->last) {
+               if (parent)
+                       *parent = prange;
+               return prange;
+       }
+       list_for_each_entry(pchild, &prange->child_list, child_list)
+               if (addr >= pchild->start && addr <= pchild->last) {
+                       pr_debug("found address 0x%lx pchild [0x%lx 0x%lx]\n",
+                                addr, pchild->start, pchild->last);
+                       if (parent)
+                               *parent = prange;
+                       return pchild;
+               }
+
+       return NULL;
+}
+
+/* svm_range_best_restore_location - decide the best fault restore location
+ * @prange: svm range structure
+ * @adev: the GPU on which vm fault happened
+ *
+ * This is only called when xnack is on, to decide the best location to restore
+ * the range mapping after GPU vm fault. Caller uses the best location to do
+ * migration if actual loc is not best location, then update GPU page table
+ * mapping to the best location.
+ *
+ * If vm fault gpu is range preferred loc, the best_loc is preferred loc.
+ * If vm fault gpu idx is on range ACCESSIBLE bitmap, best_loc is vm fault gpu
+ * If vm fault gpu idx is on range ACCESSIBLE_IN_PLACE bitmap, then
+ *    if range actual loc is cpu, best_loc is cpu
+ *    if vm fault gpu is on xgmi same hive of range actual loc gpu, best_loc is
+ *    range actual loc.
+ * Otherwise, GPU no access, best_loc is -1.
+ *
+ * Return:
+ * -1 means vm fault GPU no access
+ * 0 for CPU or GPU id
+ */
+static int32_t
+svm_range_best_restore_location(struct svm_range *prange,
+                               struct amdgpu_device *adev,
+                               int32_t *gpuidx)
+{
+       struct amdgpu_device *bo_adev;
+       struct kfd_process *p;
+       uint32_t gpuid;
+       int r;
+
+       p = container_of(prange->svms, struct kfd_process, svms);
+
+       r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, gpuidx);
+       if (r < 0) {
+               pr_debug("failed to get gpuid from kgd\n");
+               return -1;
+       }
+
+       if (prange->preferred_loc == gpuid)
+               return prange->preferred_loc;
+
+       if (test_bit(*gpuidx, prange->bitmap_access))
+               return gpuid;
+
+       if (test_bit(*gpuidx, prange->bitmap_aip)) {
+               if (!prange->actual_loc)
+                       return 0;
+
+               bo_adev = svm_range_get_adev_by_id(prange, prange->actual_loc);
+               if (amdgpu_xgmi_same_hive(adev, bo_adev))
+                       return prange->actual_loc;
+               else
+                       return 0;
+       }
+
+       return -1;
+}
+static int
+svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
+                               unsigned long *start, unsigned long *last)
+{
+       struct vm_area_struct *vma;
+       struct interval_tree_node *node;
+       unsigned long start_limit, end_limit;
+
+       vma = find_vma(p->mm, addr << PAGE_SHIFT);
+       if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
+               pr_debug("VMA does not exist in address [0x%llx]\n", addr);
+               return -EFAULT;
+       }
+       start_limit = max(vma->vm_start >> PAGE_SHIFT,
+                     (unsigned long)ALIGN_DOWN(addr, 2UL << 8));
+       end_limit = min(vma->vm_end >> PAGE_SHIFT,
+                   (unsigned long)ALIGN(addr + 1, 2UL << 8));
+       /* First range that starts after the fault address */
+       node = interval_tree_iter_first(&p->svms.objects, addr + 1, ULONG_MAX);
+       if (node) {
+               end_limit = min(end_limit, node->start);
+               /* Last range that ends before the fault address */
+               node = container_of(rb_prev(&node->rb),
+                                   struct interval_tree_node, rb);
+       } else {
+               /* Last range must end before addr because
+                * there was no range after addr
+                */
+               node = container_of(rb_last(&p->svms.objects.rb_root),
+                                   struct interval_tree_node, rb);
+       }
+       if (node) {
+               if (node->last >= addr) {
+                       WARN(1, "Overlap with prev node and page fault addr\n");
+                       return -EFAULT;
+               }
+               start_limit = max(start_limit, node->last + 1);
+       }
+
+       *start = start_limit;
+       *last = end_limit - 1;
+
+       pr_debug("vma start: 0x%lx start: 0x%lx vma end: 0x%lx last: 0x%lx\n",
+                 vma->vm_start >> PAGE_SHIFT, *start,
+                 vma->vm_end >> PAGE_SHIFT, *last);
+
+       return 0;
+
+}
+static struct
+svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
+                                               struct kfd_process *p,
+                                               struct mm_struct *mm,
+                                               int64_t addr)
+{
+       struct svm_range *prange = NULL;
+       unsigned long start, last;
+       uint32_t gpuid, gpuidx;
+
+       if (svm_range_get_range_boundaries(p, addr, &start, &last))
+               return NULL;
+
+       prange = svm_range_new(&p->svms, start, last);
+       if (!prange) {
+               pr_debug("Failed to create prange in address [0x%llx]\n", addr);
+               return NULL;
+       }
+       if (kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpuidx)) {
+               pr_debug("failed to get gpuid from kgd\n");
+               svm_range_free(prange);
+               return NULL;
+       }
+
+       svm_range_add_to_svms(prange);
+       svm_range_add_notifier_locked(mm, prange);
+
+       return prange;
+}
+
+/* svm_range_skip_recover - decide if prange can be recovered
+ * @prange: svm range structure
+ *
+ * GPU vm retry fault handle skip recover the range for cases:
+ * 1. prange is on deferred list to be removed after unmap, it is stale fault,
+ *    deferred list work will drain the stale fault before free the prange.
+ * 2. prange is on deferred list to add interval notifier after split, or
+ * 3. prange is child range, it is split from parent prange, recover later
+ *    after interval notifier is added.
+ *
+ * Return: true to skip recover, false to recover
+ */
+static bool svm_range_skip_recover(struct svm_range *prange)
+{
+       struct svm_range_list *svms = prange->svms;
+
+       spin_lock(&svms->deferred_list_lock);
+       if (list_empty(&prange->deferred_list) &&
+           list_empty(&prange->child_list)) {
+               spin_unlock(&svms->deferred_list_lock);
+               return false;
+       }
+       spin_unlock(&svms->deferred_list_lock);
+
+       if (prange->work_item.op == SVM_OP_UNMAP_RANGE) {
+               pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] unmapped\n",
+                        svms, prange, prange->start, prange->last);
+               return true;
+       }
+       if (prange->work_item.op == SVM_OP_ADD_RANGE_AND_MAP ||
+           prange->work_item.op == SVM_OP_ADD_RANGE) {
+               pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] not added yet\n",
+                        svms, prange, prange->start, prange->last);
+               return true;
+       }
+       return false;
+}
+
+int
+svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
+                       uint64_t addr)
+{
+       struct mm_struct *mm = NULL;
+       struct svm_range_list *svms;
+       struct svm_range *prange;
+       struct kfd_process *p;
+       uint64_t timestamp;
+       int32_t best_loc, gpuidx;
+       bool write_locked = false;
+       int r = 0;
+
+       p = kfd_lookup_process_by_pasid(pasid);
+       if (!p) {
+               pr_debug("kfd process not founded pasid 0x%x\n", pasid);
+               return -ESRCH;
+       }
+       if (!p->xnack_enabled) {
+               pr_debug("XNACK not enabled for pasid 0x%x\n", pasid);
+               return -EFAULT;
+       }
+       svms = &p->svms;
+
+       pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr);
+
+       mm = get_task_mm(p->lead_thread);
+       if (!mm) {
+               pr_debug("svms 0x%p failed to get mm\n", svms);
+               r = -ESRCH;
+               goto out;
+       }
+
+       mmap_read_lock(mm);
+retry_write_locked:
+       mutex_lock(&svms->lock);
+       prange = svm_range_from_addr(svms, addr, NULL);
+       if (!prange) {
+               pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
+                        svms, addr);
+               if (!write_locked) {
+                       /* Need the write lock to create new range with MMU notifier.
+                        * Also flush pending deferred work to make sure the interval
+                        * tree is up to date before we add a new range
+                        */
+                       mutex_unlock(&svms->lock);
+                       mmap_read_unlock(mm);
+                       mmap_write_lock(mm);
+                       write_locked = true;
+                       goto retry_write_locked;
+               }
+               prange = svm_range_create_unregistered_range(adev, p, mm, addr);
+               if (!prange) {
+                       pr_debug("failed to create unregistered range svms 0x%p address [0x%llx]\n",
+                                svms, addr);
+                       mmap_write_downgrade(mm);
+                       r = -EFAULT;
+                       goto out_unlock_svms;
+               }
+       }
+       if (write_locked)
+               mmap_write_downgrade(mm);
+
+       mutex_lock(&prange->migrate_mutex);
+
+       if (svm_range_skip_recover(prange)) {
+               amdgpu_gmc_filter_faults_remove(adev, addr, pasid);
+               goto out_unlock_range;
+       }
+
+       timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp;
+       /* skip duplicate vm fault on different pages of same range */
+       if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) {
+               pr_debug("svms 0x%p [0x%lx %lx] already restored\n",
+                        svms, prange->start, prange->last);
+               goto out_unlock_range;
+       }
+
+       best_loc = svm_range_best_restore_location(prange, adev, &gpuidx);
+       if (best_loc == -1) {
+               pr_debug("svms %p failed get best restore loc [0x%lx 0x%lx]\n",
+                        svms, prange->start, prange->last);
+               r = -EACCES;
+               goto out_unlock_range;
+       }
+
+       pr_debug("svms %p [0x%lx 0x%lx] best restore 0x%x, actual loc 0x%x\n",
+                svms, prange->start, prange->last, best_loc,
+                prange->actual_loc);
+
+       if (prange->actual_loc != best_loc) {
+               if (best_loc) {
+                       r = svm_migrate_to_vram(prange, best_loc, mm);
+                       if (r) {
+                               pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n",
+                                        r, addr);
+                               /* Fallback to system memory if migration to
+                                * VRAM failed
+                                */
+                               if (prange->actual_loc)
+                                       r = svm_migrate_vram_to_ram(prange, mm);
+                               else
+                                       r = 0;
+                       }
+               } else {
+                       r = svm_migrate_vram_to_ram(prange, mm);
+               }
+               if (r) {
+                       pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n",
+                                r, svms, prange->start, prange->last);
+                       goto out_unlock_range;
+               }
+       }
+
+       r = svm_range_validate_and_map(mm, prange, gpuidx, false, false);
+       if (r)
+               pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
+                        r, svms, prange->start, prange->last);
+
+out_unlock_range:
+       mutex_unlock(&prange->migrate_mutex);
+out_unlock_svms:
+       mutex_unlock(&svms->lock);
+       mmap_read_unlock(mm);
+       mmput(mm);
+out:
+       kfd_unref_process(p);
+
+       if (r == -EAGAIN) {
+               pr_debug("recover vm fault later\n");
+               amdgpu_gmc_filter_faults_remove(adev, addr, pasid);
+               r = 0;
+       }
+       return r;
+}
+
+void svm_range_list_fini(struct kfd_process *p)
+{
+       struct svm_range *prange;
+       struct svm_range *next;
+
+       pr_debug("pasid 0x%x svms 0x%p\n", p->pasid, &p->svms);
+
+       /* Ensure list work is finished before process is destroyed */
+       flush_work(&p->svms.deferred_list_work);
+
+       list_for_each_entry_safe(prange, next, &p->svms.list, list) {
+               svm_range_unlink(prange);
+               svm_range_remove_notifier(prange);
+               svm_range_free(prange);
+       }
+
+       mutex_destroy(&p->svms.lock);
+
+       pr_debug("pasid 0x%x svms 0x%p done\n", p->pasid, &p->svms);
+}
+
+int svm_range_list_init(struct kfd_process *p)
+{
+       struct svm_range_list *svms = &p->svms;
+
+       svms->objects = RB_ROOT_CACHED;
+       mutex_init(&svms->lock);
+       INIT_LIST_HEAD(&svms->list);
+       atomic_set(&svms->evicted_ranges, 0);
+       INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work);
+       INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work);
+       INIT_LIST_HEAD(&svms->deferred_range_list);
+       spin_lock_init(&svms->deferred_list_lock);
+
+       return 0;
+}
+
+/**
+ * svm_range_is_valid - check if virtual address range is valid
+ * @mm: current process mm_struct
+ * @start: range start address, in pages
+ * @size: range size, in pages
+ *
+ * Valid virtual address range means it belongs to one or more VMAs
+ *
+ * Context: Process context
+ *
+ * Return:
+ *  true - valid svm range
+ *  false - invalid svm range
+ */
+static bool
+svm_range_is_valid(struct mm_struct *mm, uint64_t start, uint64_t size)
+{
+       const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP;
+       struct vm_area_struct *vma;
+       unsigned long end;
+
+       start <<= PAGE_SHIFT;
+       end = start + (size << PAGE_SHIFT);
+
+       do {
+               vma = find_vma(mm, start);
+               if (!vma || start < vma->vm_start ||
+                   (vma->vm_flags & device_vma))
+                       return false;
+               start = min(end, vma->vm_end);
+       } while (start < end);
+
+       return true;
+}
+
+/**
+ * svm_range_add - add svm range and handle overlap
+ * @p: the range add to this process svms
+ * @start: page size aligned
+ * @size: page size aligned
+ * @nattr: number of attributes
+ * @attrs: array of attributes
+ * @update_list: output, the ranges need validate and update GPU mapping
+ * @insert_list: output, the ranges need insert to svms
+ * @remove_list: output, the ranges are replaced and need remove from svms
+ *
+ * Check if the virtual address range has overlap with the registered ranges,
+ * split the overlapped range, copy and adjust pages address and vram nodes in
+ * old and new ranges.
+ *
+ * Context: Process context, caller must hold svms->lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+static int
+svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
+             uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
+             struct list_head *update_list, struct list_head *insert_list,
+             struct list_head *remove_list)
+{
+       uint64_t last = start + size - 1UL;
+       struct svm_range_list *svms;
+       struct svm_range new = {0};
+       struct svm_range *prange;
+       unsigned long left = 0;
+       int r = 0;
+
+       pr_debug("svms 0x%p [0x%llx 0x%llx]\n", &p->svms, start, last);
+
+       svm_range_apply_attrs(p, &new, nattr, attrs);
+
+       svms = &p->svms;
+
+       r = svm_range_handle_overlap(svms, &new, start, last, update_list,
+                                    insert_list, remove_list, &left);
+       if (r)
+               return r;
+
+       if (left) {
+               prange = svm_range_new(svms, last - left + 1, last);
+               list_add(&prange->insert_list, insert_list);
+               list_add(&prange->update_list, update_list);
+       }
+
+       return 0;
+}
+
+/* svm_range_best_prefetch_location - decide the best prefetch location
+ * @prange: svm range structure
+ *
+ * For xnack off:
+ * If range map to single GPU, the best acutal location is prefetch loc, which
+ * can be CPU or GPU.
+ *
+ * If range map to multiple GPUs, only if mGPU connection on xgmi same hive,
+ * the best actual location could be prefetch_loc GPU. If mGPU connection on
+ * PCIe, the best actual location is always CPU, because GPU cannot access vram
+ * of other GPUs, assuming PCIe small bar (large bar support is not upstream).
+ *
+ * For xnack on:
+ * The best actual location is prefetch location. If mGPU connection on xgmi
+ * same hive, range map to multiple GPUs. Otherwise, the range only map to
+ * actual location GPU. Other GPU access vm fault will trigger migration.
+ *
+ * Context: Process context
+ *
+ * Return:
+ * 0 for CPU or GPU id
+ */
+static uint32_t
+svm_range_best_prefetch_location(struct svm_range *prange)
+{
+       DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
+       uint32_t best_loc = prange->prefetch_loc;
+       struct kfd_process_device *pdd;
+       struct amdgpu_device *bo_adev;
+       struct amdgpu_device *adev;
+       struct kfd_process *p;
+       uint32_t gpuidx;
+
+       p = container_of(prange->svms, struct kfd_process, svms);
+
+       /* xnack on */
+       if (p->xnack_enabled)
+               goto out;
+
+       /* xnack off */
+       if (!best_loc || best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED)
+               goto out;
+
+       bo_adev = svm_range_get_adev_by_id(prange, best_loc);
+       if (!bo_adev) {
+               WARN_ONCE(1, "failed to get device by id 0x%x\n", best_loc);
+               best_loc = 0;
+               goto out;
+       }
+       bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
+                 MAX_GPU_INSTANCE);
+
+       for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
+               pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+               if (!pdd) {
+                       pr_debug("failed to get device by idx 0x%x\n", gpuidx);
+                       continue;
+               }
+               adev = (struct amdgpu_device *)pdd->dev->kgd;
+
+               if (adev == bo_adev)
+                       continue;
+
+               if (!amdgpu_xgmi_same_hive(adev, bo_adev)) {
+                       best_loc = 0;
+                       break;
+               }
+       }
+
+out:
+       pr_debug("xnack %d svms 0x%p [0x%lx 0x%lx] best loc 0x%x\n",
+                p->xnack_enabled, &p->svms, prange->start, prange->last,
+                best_loc);
+
+       return best_loc;
+}
+
+/* FIXME: This is a workaround for page locking bug when some pages are
+ * invalid during migration to VRAM
+ */
+void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm)
+{
+       struct hmm_range *hmm_range;
+       int r;
+
+       if (prange->validated_once)
+               return;
+
+       r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
+                                      prange->start << PAGE_SHIFT,
+                                      prange->npages, &hmm_range,
+                                      false, true);
+       if (!r) {
+               amdgpu_hmm_range_get_pages_done(hmm_range);
+               prange->validated_once = true;
+       }
+}
+
+/* svm_range_trigger_migration - start page migration if prefetch loc changed
+ * @mm: current process mm_struct
+ * @prange: svm range structure
+ * @migrated: output, true if migration is triggered
+ *
+ * If range perfetch_loc is GPU, actual loc is cpu 0, then migrate the range
+ * from ram to vram.
+ * If range prefetch_loc is cpu 0, actual loc is GPU, then migrate the range
+ * from vram to ram.
+ *
+ * If GPU vm fault retry is not enabled, migration interact with MMU notifier
+ * and restore work:
+ * 1. migrate_vma_setup invalidate pages, MMU notifier callback svm_range_evict
+ *    stops all queues, schedule restore work
+ * 2. svm_range_restore_work wait for migration is done by
+ *    a. svm_range_validate_vram takes prange->migrate_mutex
+ *    b. svm_range_validate_ram HMM get pages wait for CPU fault handle returns
+ * 3. restore work update mappings of GPU, resume all queues.
+ *
+ * Context: Process context
+ *
+ * Return:
+ * 0 - OK, otherwise - error code of migration
+ */
+static int
+svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
+                           bool *migrated)
+{
+       uint32_t best_loc;
+       int r = 0;
+
+       *migrated = false;
+       best_loc = svm_range_best_prefetch_location(prange);
+
+       if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
+           best_loc == prange->actual_loc)
+               return 0;
+
+       /*
+        * Prefetch to GPU without host access flag, set actual_loc to gpu, then
+        * validate on gpu and map to gpus will be handled afterwards.
+        */
+       if (best_loc && !prange->actual_loc &&
+           !(prange->flags & KFD_IOCTL_SVM_FLAG_HOST_ACCESS)) {
+               prange->actual_loc = best_loc;
+               return 0;
+       }
+
+       if (!best_loc) {
+               r = svm_migrate_vram_to_ram(prange, mm);
+               *migrated = !r;
+               return r;
+       }
+
+       r = svm_migrate_to_vram(prange, best_loc, mm);
+       *migrated = !r;
+
+       return r;
+}
+
+int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence)
+{
+       if (!fence)
+               return -EINVAL;
+
+       if (dma_fence_is_signaled(&fence->base))
+               return 0;
+
+       if (fence->svm_bo) {
+               WRITE_ONCE(fence->svm_bo->evicting, 1);
+               schedule_work(&fence->svm_bo->eviction_work);
+       }
+
+       return 0;
+}
+
+static void svm_range_evict_svm_bo_worker(struct work_struct *work)
+{
+       struct svm_range_bo *svm_bo;
+       struct kfd_process *p;
+       struct mm_struct *mm;
+
+       svm_bo = container_of(work, struct svm_range_bo, eviction_work);
+       if (!svm_bo_ref_unless_zero(svm_bo))
+               return; /* svm_bo was freed while eviction was pending */
+
+       /* svm_range_bo_release destroys this worker thread. So during
+        * the lifetime of this thread, kfd_process and mm will be valid.
+        */
+       p = container_of(svm_bo->svms, struct kfd_process, svms);
+       mm = p->mm;
+       if (!mm)
+               return;
+
+       mmap_read_lock(mm);
+       spin_lock(&svm_bo->list_lock);
+       while (!list_empty(&svm_bo->range_list)) {
+               struct svm_range *prange =
+                               list_first_entry(&svm_bo->range_list,
+                                               struct svm_range, svm_bo_list);
+               list_del_init(&prange->svm_bo_list);
+               spin_unlock(&svm_bo->list_lock);
+
+               pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms,
+                        prange->start, prange->last);
+
+               mutex_lock(&prange->migrate_mutex);
+               svm_migrate_vram_to_ram(prange, svm_bo->eviction_fence->mm);
+
+               mutex_lock(&prange->lock);
+               prange->svm_bo = NULL;
+               mutex_unlock(&prange->lock);
+
+               mutex_unlock(&prange->migrate_mutex);
+
+               spin_lock(&svm_bo->list_lock);
+       }
+       spin_unlock(&svm_bo->list_lock);
+       mmap_read_unlock(mm);
+
+       dma_fence_signal(&svm_bo->eviction_fence->base);
+       /* This is the last reference to svm_bo, after svm_range_vram_node_free
+        * has been called in svm_migrate_vram_to_ram
+        */
+       WARN_ONCE(kref_read(&svm_bo->kref) != 1, "This was not the last reference\n");
+       svm_range_bo_unref(svm_bo);
+}
+
+static int
+svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
+                  uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+{
+       struct amdkfd_process_info *process_info = p->kgd_process_info;
+       struct mm_struct *mm = current->mm;
+       struct list_head update_list;
+       struct list_head insert_list;
+       struct list_head remove_list;
+       struct svm_range_list *svms;
+       struct svm_range *prange;
+       struct svm_range *next;
+       int r = 0;
+
+       pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
+                p->pasid, &p->svms, start, start + size - 1, size);
+
+       r = svm_range_check_attr(p, nattr, attrs);
+       if (r)
+               return r;
+
+       svms = &p->svms;
+
+       mutex_lock(&process_info->lock);
+
+       svm_range_list_lock_and_flush_work(svms, mm);
+
+       if (!svm_range_is_valid(mm, start, size)) {
+               pr_debug("invalid range\n");
+               r = -EFAULT;
+               mmap_write_unlock(mm);
+               goto out;
+       }
+
+       mutex_lock(&svms->lock);
+
+       /* Add new range and split existing ranges as needed */
+       r = svm_range_add(p, start, size, nattr, attrs, &update_list,
+                         &insert_list, &remove_list);
+       if (r) {
+               mutex_unlock(&svms->lock);
+               mmap_write_unlock(mm);
+               goto out;
+       }
+       /* Apply changes as a transaction */
+       list_for_each_entry_safe(prange, next, &insert_list, insert_list) {
+               svm_range_add_to_svms(prange);
+               svm_range_add_notifier_locked(mm, prange);
+       }
+       list_for_each_entry(prange, &update_list, update_list) {
+               svm_range_apply_attrs(p, prange, nattr, attrs);
+               /* TODO: unmap ranges from GPU that lost access */
+       }
+       list_for_each_entry_safe(prange, next, &remove_list,
+                               remove_list) {
+               pr_debug("unlink old 0x%p prange 0x%p [0x%lx 0x%lx]\n",
+                        prange->svms, prange, prange->start,
+                        prange->last);
+               svm_range_unlink(prange);
+               svm_range_remove_notifier(prange);
+               svm_range_free(prange);
+       }
+
+       mmap_write_downgrade(mm);
+       /* Trigger migrations and revalidate and map to GPUs as needed. If
+        * this fails we may be left with partially completed actions. There
+        * is no clean way of rolling back to the previous state in such a
+        * case because the rollback wouldn't be guaranteed to work either.
+        */
+       list_for_each_entry(prange, &update_list, update_list) {
+               bool migrated;
+
+               mutex_lock(&prange->migrate_mutex);
+
+               r = svm_range_trigger_migration(mm, prange, &migrated);
+               if (r)
+                       goto out_unlock_range;
+
+               if (migrated && !p->xnack_enabled) {
+                       pr_debug("restore_work will update mappings of GPUs\n");
+                       mutex_unlock(&prange->migrate_mutex);
+                       continue;
+               }
+
+               r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
+                                              true, true);
+               if (r)
+                       pr_debug("failed %d to map svm range\n", r);
+
+out_unlock_range:
+               mutex_unlock(&prange->migrate_mutex);
+               if (r)
+                       break;
+       }
+
+       svm_range_debug_dump(svms);
+
+       mutex_unlock(&svms->lock);
+       mmap_read_unlock(mm);
+out:
+       mutex_unlock(&process_info->lock);
+
+       pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid,
+                &p->svms, start, start + size - 1, r);
+
+       return r;
+}
+
+static int
+svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size,
+                  uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+{
+       DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
+       DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
+       bool get_preferred_loc = false;
+       bool get_prefetch_loc = false;
+       bool get_granularity = false;
+       bool get_accessible = false;
+       bool get_flags = false;
+       uint64_t last = start + size - 1UL;
+       struct mm_struct *mm = current->mm;
+       uint8_t granularity = 0xff;
+       struct interval_tree_node *node;
+       struct svm_range_list *svms;
+       struct svm_range *prange;
+       uint32_t prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+       uint32_t location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+       uint32_t flags = 0xffffffff;
+       int gpuidx;
+       uint32_t i;
+
+       pr_debug("svms 0x%p [0x%llx 0x%llx] nattr 0x%x\n", &p->svms, start,
+                start + size - 1, nattr);
+
+       mmap_read_lock(mm);
+       if (!svm_range_is_valid(mm, start, size)) {
+               pr_debug("invalid range\n");
+               mmap_read_unlock(mm);
+               return -EINVAL;
+       }
+       mmap_read_unlock(mm);
+
+       for (i = 0; i < nattr; i++) {
+               switch (attrs[i].type) {
+               case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
+                       get_preferred_loc = true;
+                       break;
+               case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
+                       get_prefetch_loc = true;
+                       break;
+               case KFD_IOCTL_SVM_ATTR_ACCESS:
+                       get_accessible = true;
+                       break;
+               case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+                       get_flags = true;
+                       break;
+               case KFD_IOCTL_SVM_ATTR_GRANULARITY:
+                       get_granularity = true;
+                       break;
+               case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
+               case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
+               case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
+                       fallthrough;
+               default:
+                       pr_debug("get invalid attr type 0x%x\n", attrs[i].type);
+                       return -EINVAL;
+               }
+       }
+
+       svms = &p->svms;
+
+       mutex_lock(&svms->lock);
+
+       node = interval_tree_iter_first(&svms->objects, start, last);
+       if (!node) {
+               pr_debug("range attrs not found return default values\n");
+               svm_range_set_default_attributes(&location, &prefetch_loc,
+                                                &granularity, &flags);
+               if (p->xnack_enabled)
+                       bitmap_fill(bitmap_access, MAX_GPU_INSTANCE);
+               else
+                       bitmap_zero(bitmap_access, MAX_GPU_INSTANCE);
+               bitmap_zero(bitmap_aip, MAX_GPU_INSTANCE);
+               goto fill_values;
+       }
+       bitmap_fill(bitmap_access, MAX_GPU_INSTANCE);
+       bitmap_fill(bitmap_aip, MAX_GPU_INSTANCE);
+
+       while (node) {
+               struct interval_tree_node *next;
+
+               prange = container_of(node, struct svm_range, it_node);
+               next = interval_tree_iter_next(node, start, last);
+
+               if (get_preferred_loc) {
+                       if (prange->preferred_loc ==
+                                       KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
+                           (location != KFD_IOCTL_SVM_LOCATION_UNDEFINED &&
+                            location != prange->preferred_loc)) {
+                               location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+                               get_preferred_loc = false;
+                       } else {
+                               location = prange->preferred_loc;
+                       }
+               }
+               if (get_prefetch_loc) {
+                       if (prange->prefetch_loc ==
+                                       KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
+                           (prefetch_loc != KFD_IOCTL_SVM_LOCATION_UNDEFINED &&
+                            prefetch_loc != prange->prefetch_loc)) {
+                               prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+                               get_prefetch_loc = false;
+                       } else {
+                               prefetch_loc = prange->prefetch_loc;
+                       }
+               }
+               if (get_accessible) {
+                       bitmap_and(bitmap_access, bitmap_access,
+                                  prange->bitmap_access, MAX_GPU_INSTANCE);
+                       bitmap_and(bitmap_aip, bitmap_aip,
+                                  prange->bitmap_aip, MAX_GPU_INSTANCE);
+               }
+               if (get_flags)
+                       flags &= prange->flags;
+
+               if (get_granularity && prange->granularity < granularity)
+                       granularity = prange->granularity;
+
+               node = next;
+       }
+fill_values:
+       mutex_unlock(&svms->lock);
+
+       for (i = 0; i < nattr; i++) {
+               switch (attrs[i].type) {
+               case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
+                       attrs[i].value = location;
+                       break;
+               case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
+                       attrs[i].value = prefetch_loc;
+                       break;
+               case KFD_IOCTL_SVM_ATTR_ACCESS:
+                       gpuidx = kfd_process_gpuidx_from_gpuid(p,
+                                                              attrs[i].value);
+                       if (gpuidx < 0) {
+                               pr_debug("invalid gpuid %x\n", attrs[i].value);
+                               return -EINVAL;
+                       }
+                       if (test_bit(gpuidx, bitmap_access))
+                               attrs[i].type = KFD_IOCTL_SVM_ATTR_ACCESS;
+                       else if (test_bit(gpuidx, bitmap_aip))
+                               attrs[i].type =
+                                       KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE;
+                       else
+                               attrs[i].type = KFD_IOCTL_SVM_ATTR_NO_ACCESS;
+                       break;
+               case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+                       attrs[i].value = flags;
+                       break;
+               case KFD_IOCTL_SVM_ATTR_GRANULARITY:
+                       attrs[i].value = (uint32_t)granularity;
+                       break;
+               }
+       }
+
+       return 0;
+}
+
+int
+svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
+         uint64_t size, uint32_t nattrs, struct kfd_ioctl_svm_attribute *attrs)
+{
+       int r;
+
+       start >>= PAGE_SHIFT;
+       size >>= PAGE_SHIFT;
+
+       switch (op) {
+       case KFD_IOCTL_SVM_OP_SET_ATTR:
+               r = svm_range_set_attr(p, start, size, nattrs, attrs);
+               break;
+       case KFD_IOCTL_SVM_OP_GET_ATTR:
+               r = svm_range_get_attr(p, start, size, nattrs, attrs);
+               break;
+       default:
+               r = EINVAL;
+               break;
+       }
+
+       return r;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
new file mode 100644 (file)
index 0000000..573f984
--- /dev/null
@@ -0,0 +1,206 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2020-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef KFD_SVM_H_
+#define KFD_SVM_H_
+
+#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
+
+#include <linux/rwsem.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/sched/mm.h>
+#include <linux/hmm.h>
+#include "amdgpu.h"
+#include "kfd_priv.h"
+
+struct svm_range_bo {
+       struct amdgpu_bo                *bo;
+       struct kref                     kref;
+       struct list_head                range_list; /* all svm ranges shared this bo */
+       spinlock_t                      list_lock;
+       struct amdgpu_amdkfd_fence      *eviction_fence;
+       struct work_struct              eviction_work;
+       struct svm_range_list           *svms;
+       uint32_t                        evicting;
+};
+
+enum svm_work_list_ops {
+       SVM_OP_NULL,
+       SVM_OP_UNMAP_RANGE,
+       SVM_OP_UPDATE_RANGE_NOTIFIER,
+       SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP,
+       SVM_OP_ADD_RANGE,
+       SVM_OP_ADD_RANGE_AND_MAP
+};
+
+struct svm_work_list_item {
+       enum svm_work_list_ops op;
+       struct mm_struct *mm;
+};
+
+/**
+ * struct svm_range - shared virtual memory range
+ *
+ * @svms:       list of svm ranges, structure defined in kfd_process
+ * @migrate_mutex: to serialize range migration, validation and mapping update
+ * @start:      range start address in pages
+ * @last:       range last address in pages
+ * @it_node:    node [start, last] stored in interval tree, start, last are page
+ *              aligned, page size is (last - start + 1)
+ * @list:       link list node, used to scan all ranges of svms
+ * @update_list:link list node used to add to update_list
+ * @remove_list:link list node used to add to remove list
+ * @insert_list:link list node used to add to insert list
+ * @mapping:    bo_va mapping structure to create and update GPU page table
+ * @npages:     number of pages
+ * @dma_addr:   dma mapping address on each GPU for system memory physical page
+ * @ttm_res:    vram ttm resource map
+ * @offset:     range start offset within mm_nodes
+ * @svm_bo:     struct to manage splited amdgpu_bo
+ * @svm_bo_list:link list node, to scan all ranges which share same svm_bo
+ * @lock:       protect prange start, last, child_list, svm_bo_list
+ * @saved_flags:save/restore current PF_MEMALLOC flags
+ * @flags:      flags defined as KFD_IOCTL_SVM_FLAG_*
+ * @perferred_loc: perferred location, 0 for CPU, or GPU id
+ * @perfetch_loc: last prefetch location, 0 for CPU, or GPU id
+ * @actual_loc: the actual location, 0 for CPU, or GPU id
+ * @granularity:migration granularity, log2 num pages
+ * @invalid:    not 0 means cpu page table is invalidated
+ * @validate_timestamp: system timestamp when range is validated
+ * @notifier:   register mmu interval notifier
+ * @work_item:  deferred work item information
+ * @deferred_list: list header used to add range to deferred list
+ * @child_list: list header for split ranges which are not added to svms yet
+ * @bitmap_access: index bitmap of GPUs which can access the range
+ * @bitmap_aip: index bitmap of GPUs which can access the range in place
+ *
+ * Data structure for virtual memory range shared by CPU and GPUs, it can be
+ * allocated from system memory ram or device vram, and migrate from ram to vram
+ * or from vram to ram.
+ */
+struct svm_range {
+       struct svm_range_list           *svms;
+       struct mutex                    migrate_mutex;
+       unsigned long                   start;
+       unsigned long                   last;
+       struct interval_tree_node       it_node;
+       struct list_head                list;
+       struct list_head                update_list;
+       struct list_head                remove_list;
+       struct list_head                insert_list;
+       struct amdgpu_bo_va_mapping     mapping;
+       uint64_t                        npages;
+       dma_addr_t                      *dma_addr[MAX_GPU_INSTANCE];
+       struct ttm_resource             *ttm_res;
+       uint64_t                        offset;
+       struct svm_range_bo             *svm_bo;
+       struct list_head                svm_bo_list;
+       struct mutex                    lock;
+       unsigned int                    saved_flags;
+       uint32_t                        flags;
+       uint32_t                        preferred_loc;
+       uint32_t                        prefetch_loc;
+       uint32_t                        actual_loc;
+       uint8_t                         granularity;
+       atomic_t                        invalid;
+       uint64_t                        validate_timestamp;
+       struct mmu_interval_notifier    notifier;
+       struct svm_work_list_item       work_item;
+       struct list_head                deferred_list;
+       struct list_head                child_list;
+       DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
+       DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
+       bool                            validated_once;
+};
+
+static inline void svm_range_lock(struct svm_range *prange)
+{
+       mutex_lock(&prange->lock);
+       prange->saved_flags = memalloc_noreclaim_save();
+
+}
+static inline void svm_range_unlock(struct svm_range *prange)
+{
+       memalloc_noreclaim_restore(prange->saved_flags);
+       mutex_unlock(&prange->lock);
+}
+
+int svm_range_list_init(struct kfd_process *p);
+void svm_range_list_fini(struct kfd_process *p);
+int svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
+             uint64_t size, uint32_t nattrs,
+             struct kfd_ioctl_svm_attribute *attrs);
+struct svm_range *svm_range_from_addr(struct svm_range_list *svms,
+                                     unsigned long addr,
+                                     struct svm_range **parent);
+struct amdgpu_device *svm_range_get_adev_by_id(struct svm_range *prange,
+                                              uint32_t id);
+int svm_range_vram_node_new(struct amdgpu_device *adev,
+                           struct svm_range *prange, bool clear);
+void svm_range_vram_node_free(struct svm_range *prange);
+int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
+                              unsigned long addr, struct svm_range *parent,
+                              struct svm_range *prange);
+int svm_range_restore_pages(struct amdgpu_device *adev,
+                           unsigned int pasid, uint64_t addr);
+int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence);
+void svm_range_add_list_work(struct svm_range_list *svms,
+                            struct svm_range *prange, struct mm_struct *mm,
+                            enum svm_work_list_ops op);
+void schedule_deferred_list_work(struct svm_range_list *svms);
+void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
+                        unsigned long offset, unsigned long npages);
+void svm_range_free_dma_mappings(struct svm_range *prange);
+void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm);
+
+#else
+
+struct kfd_process;
+
+static inline int svm_range_list_init(struct kfd_process *p)
+{
+       return 0;
+}
+static inline void svm_range_list_fini(struct kfd_process *p)
+{
+       /* empty */
+}
+
+static inline int svm_range_restore_pages(struct amdgpu_device *adev,
+                                         unsigned int pasid, uint64_t addr)
+{
+       return -EFAULT;
+}
+
+static inline int svm_range_schedule_evict_svm_bo(
+               struct amdgpu_amdkfd_fence *fence)
+{
+       WARN_ONCE(1, "SVM eviction fence triggered, but SVM is disabled");
+       return -EINVAL;
+}
+
+#endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
+
+#endif /* KFD_SVM_H_ */
index cdef608db4f42d615e419cc4e52c28bf2457d656..7fae6a7e51f5b19f9b0907f43478dad7cee5d7c3 100644 (file)
@@ -1192,40 +1192,60 @@ static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev)
                mem->mem_clk_max = local_mem_info.mem_clk_max;
 }
 
-static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
+static void kfd_set_iolink_no_atomics(struct kfd_topology_device *dev,
+                                       struct kfd_topology_device *target_gpu_dev,
+                                       struct kfd_iolink_properties *link)
 {
-       struct kfd_iolink_properties *link, *cpu_link;
-       struct kfd_topology_device *cpu_dev;
-       uint32_t cap;
-       uint32_t cpu_flag = CRAT_IOLINK_FLAGS_ENABLED;
-       uint32_t flag = CRAT_IOLINK_FLAGS_ENABLED;
-
-       if (!dev || !dev->gpu)
+       /* xgmi always supports atomics between links. */
+       if (link->iolink_type == CRAT_IOLINK_TYPE_XGMI)
                return;
 
-       pcie_capability_read_dword(dev->gpu->pdev,
-                       PCI_EXP_DEVCAP2, &cap);
+       /* check pcie support to set cpu(dev) flags for target_gpu_dev link. */
+       if (target_gpu_dev) {
+               uint32_t cap;
 
-       if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
-                    PCI_EXP_DEVCAP2_ATOMIC_COMP64)))
-               cpu_flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
-                       CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
+               pcie_capability_read_dword(target_gpu_dev->gpu->pdev,
+                               PCI_EXP_DEVCAP2, &cap);
+
+               if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+                            PCI_EXP_DEVCAP2_ATOMIC_COMP64)))
+                       link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
+                               CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
+       /* set gpu (dev) flags. */
+       } else {
+               if (!dev->gpu->pci_atomic_requested ||
+                               dev->gpu->device_info->asic_family ==
+                                                       CHIP_HAWAII)
+                       link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
+                               CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
+       }
+}
+
+static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
+{
+       struct kfd_iolink_properties *link, *inbound_link;
+       struct kfd_topology_device *peer_dev;
 
-       if (!dev->gpu->pci_atomic_requested ||
-           dev->gpu->device_info->asic_family == CHIP_HAWAII)
-               flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
-                       CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
+       if (!dev || !dev->gpu)
+               return;
 
        /* GPU only creates direct links so apply flags setting to all */
        list_for_each_entry(link, &dev->io_link_props, list) {
-               link->flags = flag;
-               cpu_dev = kfd_topology_device_by_proximity_domain(
+               link->flags = CRAT_IOLINK_FLAGS_ENABLED;
+               kfd_set_iolink_no_atomics(dev, NULL, link);
+               peer_dev = kfd_topology_device_by_proximity_domain(
                                link->node_to);
-               if (cpu_dev) {
-                       list_for_each_entry(cpu_link,
-                                           &cpu_dev->io_link_props, list)
-                               if (cpu_link->node_to == link->node_from)
-                                       cpu_link->flags = cpu_flag;
+
+               if (!peer_dev)
+                       continue;
+
+               list_for_each_entry(inbound_link, &peer_dev->io_link_props,
+                                                                       list) {
+                       if (inbound_link->node_to != link->node_from)
+                               continue;
+
+                       inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED;
+                       kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link);
                }
        }
 }
@@ -1410,15 +1430,21 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
        adev = (struct amdgpu_device *)(dev->gpu->kgd);
        /* kfd only concerns sram ecc on GFX and HBM ecc on UMC */
        dev->node_props.capability |=
-               ((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
+               ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
                HSA_CAP_SRAM_EDCSUPPORTED : 0;
-       dev->node_props.capability |= ((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
+       dev->node_props.capability |= ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
                HSA_CAP_MEM_EDCSUPPORTED : 0;
 
        if (adev->asic_type != CHIP_VEGA10)
-               dev->node_props.capability |= (adev->ras_features != 0) ?
+               dev->node_props.capability |= (adev->ras_enabled != 0) ?
                        HSA_CAP_RASEVENTNOTIFY : 0;
 
+       /* SVM API and HMM page migration work together, device memory type
+        * is initialized to not 0 when page migration register device memory.
+        */
+       if (adev->kfd.dev->pgmap.type != 0)
+               dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED;
+
        kfd_debug_print_topology();
 
        if (!res)
index b8b68087bd7a3085120b98f98bad335312eb6a9f..6bd6380b0ee08961606d4ac7d117e761c840ad12 100644 (file)
@@ -53,8 +53,9 @@
 #define HSA_CAP_ASIC_REVISION_MASK             0x03c00000
 #define HSA_CAP_ASIC_REVISION_SHIFT            22
 #define HSA_CAP_SRAM_EDCSUPPORTED              0x04000000
+#define HSA_CAP_SVMAPI_SUPPORTED               0x08000000
 
-#define HSA_CAP_RESERVED                       0xf80f8000
+#define HSA_CAP_RESERVED                       0xf00f8000
 
 struct kfd_node_properties {
        uint64_t hive_id;
@@ -98,9 +99,10 @@ struct kfd_node_properties {
 #define HSA_MEM_HEAP_TYPE_GPU_LDS      4
 #define HSA_MEM_HEAP_TYPE_GPU_SCRATCH  5
 
-#define HSA_MEM_FLAGS_HOT_PLUGGABLE    0x00000001
-#define HSA_MEM_FLAGS_NON_VOLATILE     0x00000002
-#define HSA_MEM_FLAGS_RESERVED         0xfffffffc
+#define HSA_MEM_FLAGS_HOT_PLUGGABLE            0x00000001
+#define HSA_MEM_FLAGS_NON_VOLATILE             0x00000002
+#define HSA_MEM_FLAGS_COHERENTHOSTACCESS       0x00000004
+#define HSA_MEM_FLAGS_RESERVED                 0xfffffff8
 
 struct kfd_mem_properties {
        struct list_head        list;
index 9ca517b658546cecb4013a50c38c980deee0f62f..4823f8a321592cac4d53765a7e5d16d2a19deea3 100644 (file)
@@ -35,6 +35,7 @@
 #include "dc/inc/hw/abm.h"
 #include "dc/dc_dmub_srv.h"
 #include "dc/dc_edid_parser.h"
+#include "dc/dc_stat.h"
 #include "amdgpu_dm_trace.h"
 
 #include "vid.h"
@@ -59,6 +60,7 @@
 
 #include "ivsrcid/ivsrcid_vislands30.h"
 
+#include "i2caux_interface.h"
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/types.h>
@@ -618,6 +620,58 @@ static void dm_dcn_vertical_interrupt0_high_irq(void *interrupt_params)
        amdgpu_dm_crtc_handle_crc_window_irq(&acrtc->base);
 }
 #endif
+
+/**
+ * dm_dmub_outbox1_low_irq() - Handles Outbox interrupt
+ * @interrupt_params: used for determining the Outbox instance
+ *
+ * Handles the Outbox Interrupt
+ * event handler.
+ */
+#define DMUB_TRACE_MAX_READ 64
+static void dm_dmub_outbox1_low_irq(void *interrupt_params)
+{
+       struct dmub_notification notify;
+       struct common_irq_params *irq_params = interrupt_params;
+       struct amdgpu_device *adev = irq_params->adev;
+       struct amdgpu_display_manager *dm = &adev->dm;
+       struct dmcub_trace_buf_entry entry = { 0 };
+       uint32_t count = 0;
+
+       if (dc_enable_dmub_notifications(adev->dm.dc)) {
+               if (irq_params->irq_src == DC_IRQ_SOURCE_DMCUB_OUTBOX) {
+                       do {
+                               dc_stat_get_dmub_notification(adev->dm.dc, &notify);
+                       } while (notify.pending_notification);
+
+                       if (adev->dm.dmub_notify)
+                               memcpy(adev->dm.dmub_notify, &notify, sizeof(struct dmub_notification));
+                       if (notify.type == DMUB_NOTIFICATION_AUX_REPLY)
+                               complete(&adev->dm.dmub_aux_transfer_done);
+                       // TODO : HPD Implementation
+
+               } else {
+                       DRM_ERROR("DM: Failed to receive correct outbox IRQ !");
+               }
+       }
+
+
+       do {
+               if (dc_dmub_srv_get_dmub_outbox0_msg(dm->dc, &entry)) {
+                       trace_amdgpu_dmub_trace_high_irq(entry.trace_code, entry.tick_count,
+                                                       entry.param0, entry.param1);
+
+                       DRM_DEBUG_DRIVER("trace_code:%u, tick_count:%u, param0:%u, param1:%u\n",
+                                entry.trace_code, entry.tick_count, entry.param0, entry.param1);
+               } else
+                       break;
+
+               count++;
+
+       } while (count <= DMUB_TRACE_MAX_READ);
+
+       ASSERT(count <= DMUB_TRACE_MAX_READ);
+}
 #endif
 
 static int dm_set_clockgating_state(void *handle,
@@ -938,32 +992,6 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
 }
 
 #if defined(CONFIG_DRM_AMD_DC_DCN)
-#define DMUB_TRACE_MAX_READ 64
-static void dm_dmub_trace_high_irq(void *interrupt_params)
-{
-       struct common_irq_params *irq_params = interrupt_params;
-       struct amdgpu_device *adev = irq_params->adev;
-       struct amdgpu_display_manager *dm = &adev->dm;
-       struct dmcub_trace_buf_entry entry = { 0 };
-       uint32_t count = 0;
-
-       do {
-               if (dc_dmub_srv_get_dmub_outbox0_msg(dm->dc, &entry)) {
-                       trace_amdgpu_dmub_trace_high_irq(entry.trace_code, entry.tick_count,
-                                                       entry.param0, entry.param1);
-
-                       DRM_DEBUG_DRIVER("trace_code:%u, tick_count:%u, param0:%u, param1:%u\n",
-                                entry.trace_code, entry.tick_count, entry.param0, entry.param1);
-               } else
-                       break;
-
-               count++;
-
-       } while (count <= DMUB_TRACE_MAX_READ);
-
-       ASSERT(count <= DMUB_TRACE_MAX_READ);
-}
-
 static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_addr_space_config *pa_config)
 {
        uint64_t pt_base;
@@ -1220,6 +1248,16 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
        adev->dm.crc_rd_wrk = amdgpu_dm_crtc_secure_display_create_work();
 #endif
+       if (dc_enable_dmub_notifications(adev->dm.dc)) {
+               init_completion(&adev->dm.dmub_aux_transfer_done);
+               adev->dm.dmub_notify = kzalloc(sizeof(struct dmub_notification), GFP_KERNEL);
+               if (!adev->dm.dmub_notify) {
+                       DRM_INFO("amdgpu: fail to allocate adev->dm.dmub_notify");
+                       goto error;
+               }
+               amdgpu_dm_outbox_init(adev);
+       }
+
        if (amdgpu_dm_initialize_drm_device(adev)) {
                DRM_ERROR(
                "amdgpu: failed to initialize sw for display support.\n");
@@ -1293,6 +1331,11 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
                adev->dm.dc->ctx->dmub_srv = NULL;
        }
 
+       if (dc_enable_dmub_notifications(adev->dm.dc)) {
+               kfree(adev->dm.dmub_notify);
+               adev->dm.dmub_notify = NULL;
+       }
+
        if (adev->dm.dmub_bo)
                amdgpu_bo_free_kernel(&adev->dm.dmub_bo,
                                      &adev->dm.dmub_bo_gpu_addr,
@@ -2708,8 +2751,7 @@ static void handle_hpd_rx_irq(void *param)
         * conflict, after implement i2c helper, this mutex should be
         * retired.
         */
-       if (dc_link->type != dc_connection_mst_branch)
-               mutex_lock(&aconnector->hpd_lock);
+       mutex_lock(&aconnector->hpd_lock);
 
        read_hpd_rx_irq_data(dc_link, &hpd_irq_data);
 
@@ -2726,13 +2768,15 @@ static void handle_hpd_rx_irq(void *param)
                }
        }
 
-       mutex_lock(&adev->dm.dc_lock);
+       if (!amdgpu_in_reset(adev)) {
+               mutex_lock(&adev->dm.dc_lock);
 #ifdef CONFIG_DRM_AMD_DC_HDCP
        result = dc_link_handle_hpd_rx_irq(dc_link, &hpd_irq_data, NULL);
 #else
        result = dc_link_handle_hpd_rx_irq(dc_link, NULL, NULL);
 #endif
-       mutex_unlock(&adev->dm.dc_lock);
+               mutex_unlock(&adev->dm.dc_lock);
+       }
 
 out:
        if (result && !is_mst_root_connector) {
@@ -2776,10 +2820,10 @@ out:
        }
 #endif
 
-       if (dc_link->type != dc_connection_mst_branch) {
+       if (dc_link->type != dc_connection_mst_branch)
                drm_dp_cec_irq(&aconnector->dm_dp_aux.aux);
-               mutex_unlock(&aconnector->hpd_lock);
-       }
+
+       mutex_unlock(&aconnector->hpd_lock);
 }
 
 static void register_hpd_handlers(struct amdgpu_device *adev)
@@ -3151,38 +3195,51 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
 
        }
 
-       if (dc->ctx->dmub_srv) {
-               i = DCN_1_0__SRCID__DMCUB_OUTBOX_HIGH_PRIORITY_READY_INT;
-               r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->dmub_trace_irq);
+       /* HPD */
+       r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, DCN_1_0__SRCID__DC_HPD1_INT,
+                       &adev->hpd_irq);
+       if (r) {
+               DRM_ERROR("Failed to add hpd irq id!\n");
+               return r;
+       }
 
-               if (r) {
-                       DRM_ERROR("Failed to add dmub trace irq id!\n");
-                       return r;
-               }
+       register_hpd_handlers(adev);
 
-               int_params.int_context = INTERRUPT_HIGH_IRQ_CONTEXT;
+       return 0;
+}
+/* Register Outbox IRQ sources and initialize IRQ callbacks */
+static int register_outbox_irq_handlers(struct amdgpu_device *adev)
+{
+       struct dc *dc = adev->dm.dc;
+       struct common_irq_params *c_irq_params;
+       struct dc_interrupt_params int_params = {0};
+       int r, i;
+
+       int_params.requested_polarity = INTERRUPT_POLARITY_DEFAULT;
+       int_params.current_polarity = INTERRUPT_POLARITY_DEFAULT;
+
+       r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, DCN_1_0__SRCID__DMCUB_OUTBOX_LOW_PRIORITY_READY_INT,
+                       &adev->dmub_outbox_irq);
+       if (r) {
+               DRM_ERROR("Failed to add outbox irq id!\n");
+               return r;
+       }
+
+       if (dc->ctx->dmub_srv) {
+               i = DCN_1_0__SRCID__DMCUB_OUTBOX_LOW_PRIORITY_READY_INT;
+               int_params.int_context = INTERRUPT_LOW_IRQ_CONTEXT;
                int_params.irq_source =
-                       dc_interrupt_to_irq_source(dc, i, 0);
+               dc_interrupt_to_irq_source(dc, i, 0);
 
-               c_irq_params = &adev->dm.dmub_trace_params[0];
+               c_irq_params = &adev->dm.dmub_outbox_params[0];
 
                c_irq_params->adev = adev;
                c_irq_params->irq_src = int_params.irq_source;
 
                amdgpu_dm_irq_register_interrupt(adev, &int_params,
-                               dm_dmub_trace_high_irq, c_irq_params);
-       }
-
-       /* HPD */
-       r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, DCN_1_0__SRCID__DC_HPD1_INT,
-                       &adev->hpd_irq);
-       if (r) {
-               DRM_ERROR("Failed to add hpd irq id!\n");
-               return r;
+                               dm_dmub_outbox1_low_irq, c_irq_params);
        }
 
-       register_hpd_handlers(adev);
-
        return 0;
 }
 #endif
@@ -3414,22 +3471,37 @@ static int amdgpu_dm_backlight_update_status(struct backlight_device *bd)
 {
        struct amdgpu_display_manager *dm = bl_get_data(bd);
        struct amdgpu_dm_backlight_caps caps;
-       struct dc_link *link = NULL;
+       struct dc_link *link[AMDGPU_DM_MAX_NUM_EDP];
        u32 brightness;
        bool rc;
+       int i;
 
        amdgpu_dm_update_backlight_caps(dm);
        caps = dm->backlight_caps;
 
-       link = (struct dc_link *)dm->backlight_link;
+       for (i = 0; i < dm->num_of_edps; i++)
+               link[i] = (struct dc_link *)dm->backlight_link[i];
 
        brightness = convert_brightness_from_user(&caps, bd->props.brightness);
        // Change brightness based on AUX property
-       if (caps.aux_support)
-               rc = dc_link_set_backlight_level_nits(link, true, brightness,
-                                                     AUX_BL_DEFAULT_TRANSITION_TIME_MS);
-       else
-               rc = dc_link_set_backlight_level(dm->backlight_link, brightness, 0);
+       if (caps.aux_support) {
+               for (i = 0; i < dm->num_of_edps; i++) {
+                       rc = dc_link_set_backlight_level_nits(link[i], true, brightness,
+                               AUX_BL_DEFAULT_TRANSITION_TIME_MS);
+                       if (!rc) {
+                               DRM_ERROR("DM: Failed to update backlight via AUX on eDP[%d]\n", i);
+                               break;
+                       }
+               }
+       } else {
+               for (i = 0; i < dm->num_of_edps; i++) {
+                       rc = dc_link_set_backlight_level(dm->backlight_link[i], brightness, 0);
+                       if (!rc) {
+                               DRM_ERROR("DM: Failed to update backlight on eDP[%d]\n", i);
+                               break;
+                       }
+               }
+       }
 
        return rc ? 0 : 1;
 }
@@ -3443,7 +3515,7 @@ static int amdgpu_dm_backlight_get_brightness(struct backlight_device *bd)
        caps = dm->backlight_caps;
 
        if (caps.aux_support) {
-               struct dc_link *link = (struct dc_link *)dm->backlight_link;
+               struct dc_link *link = (struct dc_link *)dm->backlight_link[0];
                u32 avg, peak;
                bool rc;
 
@@ -3452,7 +3524,7 @@ static int amdgpu_dm_backlight_get_brightness(struct backlight_device *bd)
                        return bd->props.brightness;
                return convert_brightness_to_user(&caps, avg);
        } else {
-               int ret = dc_link_get_backlight_level(dm->backlight_link);
+               int ret = dc_link_get_backlight_level(dm->backlight_link[0]);
 
                if (ret == DC_ERROR_UNEXPECTED)
                        return bd->props.brightness;
@@ -3549,10 +3621,13 @@ static void register_backlight_device(struct amdgpu_display_manager *dm,
                 * DM initialization because not having a backlight control
                 * is better then a black screen.
                 */
-               amdgpu_dm_register_backlight_device(dm);
+               if (!dm->backlight_dev)
+                       amdgpu_dm_register_backlight_device(dm);
 
-               if (dm->backlight_dev)
-                       dm->backlight_link = link;
+               if (dm->backlight_dev) {
+                       dm->backlight_link[dm->num_of_edps] = link;
+                       dm->num_of_edps++;
+               }
        }
 #endif
 }
@@ -3643,6 +3718,22 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
                        goto fail;
                }
 
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+       /* Use Outbox interrupt */
+       switch (adev->asic_type) {
+       case CHIP_SIENNA_CICHLID:
+       case CHIP_NAVY_FLOUNDER:
+       case CHIP_RENOIR:
+               if (register_outbox_irq_handlers(dm->adev)) {
+                       DRM_ERROR("DM: Failed to initialize IRQ\n");
+                       goto fail;
+               }
+               break;
+       default:
+               DRM_DEBUG_KMS("Unsupported ASIC type for outbox: 0x%X\n", adev->asic_type);
+       }
+#endif
+
        /* loops over all connectors on the board */
        for (i = 0; i < link_cnt; i++) {
                struct dc_link *link = NULL;
@@ -6560,13 +6651,13 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state,
 {
        struct dc_stream_state *stream = NULL;
        struct drm_connector *connector;
-       struct drm_connector_state *new_con_state, *old_con_state;
+       struct drm_connector_state *new_con_state;
        struct amdgpu_dm_connector *aconnector;
        struct dm_connector_state *dm_conn_state;
        int i, j, clock, bpp;
        int vcpi, pbn_div, pbn = 0;
 
-       for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) {
+       for_each_new_connector_in_state(state, connector, new_con_state, i) {
 
                aconnector = to_amdgpu_dm_connector(connector);
 
@@ -8164,15 +8255,14 @@ static void amdgpu_dm_handle_vrr_transition(struct dm_crtc_state *old_state,
 static void amdgpu_dm_commit_cursors(struct drm_atomic_state *state)
 {
        struct drm_plane *plane;
-       struct drm_plane_state *old_plane_state, *new_plane_state;
+       struct drm_plane_state *old_plane_state;
        int i;
 
        /*
         * TODO: Make this per-stream so we don't issue redundant updates for
         * commits with multiple streams.
         */
-       for_each_oldnew_plane_in_state(state, plane, old_plane_state,
-                                      new_plane_state, i)
+       for_each_old_plane_in_state(state, plane, old_plane_state, i)
                if (plane->type == DRM_PLANE_TYPE_CURSOR)
                        handle_cursor_update(plane, old_plane_state);
 }
@@ -10668,3 +10758,30 @@ uint32_t dm_read_reg_func(const struct dc_context *ctx, uint32_t address,
 
        return value;
 }
+
+int amdgpu_dm_process_dmub_aux_transfer_sync(struct dc_context *ctx, unsigned int linkIndex,
+                               struct aux_payload *payload, enum aux_return_code_type *operation_result)
+{
+       struct amdgpu_device *adev = ctx->driver_context;
+       int ret = 0;
+
+       dc_process_dmub_aux_transfer_async(ctx->dc, linkIndex, payload);
+       ret = wait_for_completion_interruptible_timeout(&adev->dm.dmub_aux_transfer_done, 10*HZ);
+       if (ret == 0) {
+               *operation_result = AUX_RET_ERROR_TIMEOUT;
+               return -1;
+       }
+       *operation_result = (enum aux_return_code_type)adev->dm.dmub_notify->result;
+
+       if (adev->dm.dmub_notify->result == AUX_RET_SUCCESS) {
+               (*payload->reply) = adev->dm.dmub_notify->aux_reply.command;
+
+               // For read case, Copy data to payload
+               if (!payload->write && adev->dm.dmub_notify->aux_reply.length &&
+               (*payload->reply == AUX_TRANSACTION_REPLY_AUX_ACK))
+                       memcpy(payload->data, adev->dm.dmub_notify->aux_reply.data,
+                       adev->dm.dmub_notify->aux_reply.length);
+       }
+
+       return adev->dm.dmub_notify->aux_reply.length;
+}
index b2f2ccfc20bbe62cc57a30cbf03f2ead5ea6e29c..c6f79c7dfac404505469ad6022fc3051f4a35fa2 100644 (file)
@@ -46,6 +46,7 @@
 
 #define AMDGPU_DM_MAX_CRTC 6
 
+#define AMDGPU_DM_MAX_NUM_EDP 2
 /*
 #include "include/amdgpu_dal_power_if.h"
 #include "amdgpu_dm_irq.h"
@@ -54,6 +55,8 @@
 #include "irq_types.h"
 #include "signal_types.h"
 #include "amdgpu_dm_crc.h"
+struct aux_payload;
+enum aux_return_code_type;
 
 /* Forward declarations */
 struct amdgpu_device;
@@ -62,6 +65,7 @@ struct dc;
 struct amdgpu_bo;
 struct dmub_srv;
 struct dc_plane_state;
+struct dmub_notification;
 
 struct common_irq_params {
        struct amdgpu_device *adev;
@@ -135,6 +139,10 @@ struct amdgpu_dm_backlight_caps {
 
 /**
  * struct dal_allocation - Tracks mapped FB memory for SMU communication
+ * @list: list of dal allocations
+ * @bo: GPU buffer object
+ * @cpu_ptr: CPU virtual address of the GPU buffer object
+ * @gpu_addr: GPU virtual address of the GPU buffer object
  */
 struct dal_allocation {
        struct list_head list;
@@ -164,6 +172,7 @@ struct dal_allocation {
  * @compressor: Frame buffer compression buffer. See &struct dm_compressor_info
  * @force_timing_sync: set via debugfs. When set, indicates that all connected
  *                    displays will be forced to synchronize.
+ * @dmcub_trace_event_en: enable dmcub trace events
  */
 struct amdgpu_display_manager {
 
@@ -178,6 +187,8 @@ struct amdgpu_display_manager {
         */
        struct dmub_srv *dmub_srv;
 
+       struct dmub_notification *dmub_notify;
+
        /**
         * @dmub_fb_info:
         *
@@ -349,11 +360,17 @@ struct amdgpu_display_manager {
        struct common_irq_params
        dmub_trace_params[1];
 
+       struct common_irq_params
+       dmub_outbox_params[1];
+
        spinlock_t irq_handler_list_table_lock;
 
        struct backlight_device *backlight_dev;
 
-       const struct dc_link *backlight_link;
+       const struct dc_link *backlight_link[AMDGPU_DM_MAX_NUM_EDP];
+
+       uint8_t num_of_edps;
+
        struct amdgpu_dm_backlight_caps backlight_caps;
 
        struct mod_freesync *freesync_module;
@@ -418,6 +435,7 @@ struct amdgpu_display_manager {
         * DAL fb memory allocation list, for communication with SMU.
         */
        struct list_head da_list;
+       struct completion dmub_aux_transfer_done;
 };
 
 enum dsc_clock_force_state {
@@ -600,4 +618,6 @@ void amdgpu_dm_update_connector_after_detect(
 
 extern const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs;
 
+int amdgpu_dm_process_dmub_aux_transfer_sync(struct dc_context *ctx, unsigned int linkIndex,
+                                       struct aux_payload *payload, enum aux_return_code_type *operation_result);
 #endif /* __AMDGPU_DM_H__ */
index 1b6b15708b96afd8202255fbe958d63a2ffe5ada..9fbbd0159119d9fe9aabd8c05b0fad97ff28ee74 100644 (file)
@@ -925,6 +925,22 @@ static int hdcp_sink_capability_show(struct seq_file *m, void *data)
        return 0;
 }
 #endif
+
+/*
+ * Returns whether the connected display is internal and not hotpluggable.
+ * Example usage: cat /sys/kernel/debug/dri/0/DP-1/internal_display
+ */
+static int internal_display_show(struct seq_file *m, void *data)
+{
+       struct drm_connector *connector = m->private;
+       struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+       struct dc_link *link = aconnector->dc_link;
+
+       seq_printf(m, "Internal: %u\n", link->is_internal_display);
+
+       return 0;
+}
+
 /* function description
  *
  * generic SDP message access for testing
@@ -2361,6 +2377,44 @@ unlock:
        return size;
 }
 
+/*
+ * Backlight at this moment.  Read only.
+ * As written to display, taking ABM and backlight lut into account.
+ * Ranges from 0x0 to 0x10000 (= 100% PWM)
+ *
+ * Example usage: cat /sys/kernel/debug/dri/0/eDP-1/current_backlight
+ */
+static int current_backlight_show(struct seq_file *m, void *unused)
+{
+       struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(m->private);
+       struct dc_link *link = aconnector->dc_link;
+       unsigned int backlight;
+
+       backlight = dc_link_get_backlight_level(link);
+       seq_printf(m, "0x%x\n", backlight);
+
+       return 0;
+}
+
+/*
+ * Backlight value that is being approached.  Read only.
+ * As written to display, taking ABM and backlight lut into account.
+ * Ranges from 0x0 to 0x10000 (= 100% PWM)
+ *
+ * Example usage: cat /sys/kernel/debug/dri/0/eDP-1/target_backlight
+ */
+static int target_backlight_show(struct seq_file *m, void *unused)
+{
+       struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(m->private);
+       struct dc_link *link = aconnector->dc_link;
+       unsigned int backlight;
+
+       backlight = dc_link_get_target_backlight_pwm(link);
+       seq_printf(m, "0x%x\n", backlight);
+
+       return 0;
+}
+
 DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support);
 DEFINE_SHOW_ATTRIBUTE(dmub_fw_state);
 DEFINE_SHOW_ATTRIBUTE(dmub_tracebuffer);
@@ -2369,6 +2423,7 @@ DEFINE_SHOW_ATTRIBUTE(dp_lttpr_status);
 #ifdef CONFIG_DRM_AMD_DC_HDCP
 DEFINE_SHOW_ATTRIBUTE(hdcp_sink_capability);
 #endif
+DEFINE_SHOW_ATTRIBUTE(internal_display);
 
 static const struct file_operations dp_dsc_clock_en_debugfs_fops = {
        .owner = THIS_MODULE,
@@ -2594,13 +2649,17 @@ DEFINE_DEBUGFS_ATTRIBUTE(dmcub_trace_event_state_fops, dmcub_trace_event_state_g
 
 DEFINE_DEBUGFS_ATTRIBUTE(psr_fops, psr_get, NULL, "%llu\n");
 
+DEFINE_SHOW_ATTRIBUTE(current_backlight);
+DEFINE_SHOW_ATTRIBUTE(target_backlight);
+
 static const struct {
        char *name;
        const struct file_operations *fops;
 } connector_debugfs_entries[] = {
                {"force_yuv420_output", &force_yuv420_output_fops},
                {"output_bpc", &output_bpc_fops},
-               {"trigger_hotplug", &trigger_hotplug_debugfs_fops}
+               {"trigger_hotplug", &trigger_hotplug_debugfs_fops},
+               {"internal_display", &internal_display_fops}
 };
 
 void connector_debugfs_init(struct amdgpu_dm_connector *connector)
@@ -2616,8 +2675,13 @@ void connector_debugfs_init(struct amdgpu_dm_connector *connector)
                                            dp_debugfs_entries[i].fops);
                }
        }
-       if (connector->base.connector_type == DRM_MODE_CONNECTOR_eDP)
+       if (connector->base.connector_type == DRM_MODE_CONNECTOR_eDP) {
                debugfs_create_file_unsafe("psr_state", 0444, dir, connector, &psr_fops);
+               debugfs_create_file("amdgpu_current_backlight_pwm", 0444, dir, connector,
+                                   &current_backlight_fops);
+               debugfs_create_file("amdgpu_target_backlight_pwm", 0444, dir, connector,
+                                   &target_backlight_fops);
+       }
 
        for (i = 0; i < ARRAY_SIZE(connector_debugfs_entries); i++) {
                debugfs_create_file(connector_debugfs_entries[i].name,
@@ -2920,38 +2984,6 @@ static ssize_t dtn_log_write(
        return size;
 }
 
-/*
- * Backlight at this moment.  Read only.
- * As written to display, taking ABM and backlight lut into account.
- * Ranges from 0x0 to 0x10000 (= 100% PWM)
- */
-static int current_backlight_show(struct seq_file *m, void *unused)
-{
-       struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
-       struct amdgpu_display_manager *dm = &adev->dm;
-
-       unsigned int backlight = dc_link_get_backlight_level(dm->backlight_link);
-
-       seq_printf(m, "0x%x\n", backlight);
-       return 0;
-}
-
-/*
- * Backlight value that is being approached.  Read only.
- * As written to display, taking ABM and backlight lut into account.
- * Ranges from 0x0 to 0x10000 (= 100% PWM)
- */
-static int target_backlight_show(struct seq_file *m, void *unused)
-{
-       struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
-       struct amdgpu_display_manager *dm = &adev->dm;
-
-       unsigned int backlight = dc_link_get_target_backlight_pwm(dm->backlight_link);
-
-       seq_printf(m, "0x%x\n", backlight);
-       return 0;
-}
-
 static int mst_topo_show(struct seq_file *m, void *unused)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
@@ -3134,8 +3166,6 @@ static int visual_confirm_get(void *data, u64 *val)
        return 0;
 }
 
-DEFINE_SHOW_ATTRIBUTE(current_backlight);
-DEFINE_SHOW_ATTRIBUTE(target_backlight);
 DEFINE_SHOW_ATTRIBUTE(mst_topo);
 DEFINE_DEBUGFS_ATTRIBUTE(visual_confirm_fops, visual_confirm_get,
                         visual_confirm_set, "%llu\n");
@@ -3215,10 +3245,6 @@ void dtn_debugfs_init(struct amdgpu_device *adev)
        struct drm_minor *minor = adev_to_drm(adev)->primary;
        struct dentry *root = minor->debugfs_root;
 
-       debugfs_create_file("amdgpu_current_backlight_pwm", 0444,
-                           root, adev, &current_backlight_fops);
-       debugfs_create_file("amdgpu_target_backlight_pwm", 0444,
-                           root, adev, &target_backlight_fops);
        debugfs_create_file("amdgpu_mst_topology", 0444, root,
                            adev, &mst_topo_fops);
        debugfs_create_file("amdgpu_dm_dtn_log", 0644, root, adev,
index e8b325a828c1f3a28c8b285f6b75994da70033b3..4646b0d0293994e8ebd5d50bf4c1f134be422cde 100644 (file)
@@ -544,8 +544,10 @@ bool dm_helpers_dp_write_dsc_enable(
                ret = drm_dp_dpcd_write(aconnector->dsc_aux, DP_DSC_ENABLE, &enable_dsc, 1);
        }
 
-       if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT)
-               return dm_helpers_dp_write_dpcd(ctx, stream->link, DP_DSC_ENABLE, &enable_dsc, 1);
+       if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT) {
+               ret = dm_helpers_dp_write_dpcd(ctx, stream->link, DP_DSC_ENABLE, &enable_dsc, 1);
+               DC_LOG_DC("Send DSC %s to sst display\n", enable_dsc ? "enable" : "disable");
+       }
 
        return (ret > 0);
 }
@@ -640,7 +642,14 @@ enum dc_edid_status dm_helpers_read_local_edid(
 
        return edid_status;
 }
-
+int dm_helper_dmub_aux_transfer_sync(
+               struct dc_context *ctx,
+               const struct dc_link *link,
+               struct aux_payload *payload,
+               enum aux_return_code_type *operation_result)
+{
+       return amdgpu_dm_process_dmub_aux_transfer_sync(ctx, link->link_index, payload, operation_result);
+}
 void dm_set_dcn_clocks(struct dc_context *ctx, struct dc_clocks *clks)
 {
        /* TODO: something */
@@ -698,12 +707,12 @@ void dm_helpers_free_gpu_mem(
        }
 }
 
-bool dm_helpers_dmub_outbox0_interrupt_control(struct dc_context *ctx, bool enable)
+bool dm_helpers_dmub_outbox_interrupt_control(struct dc_context *ctx, bool enable)
 {
        enum dc_irq_source irq_source;
        bool ret;
 
-       irq_source = DC_IRQ_SOURCE_DMCUB_OUTBOX0;
+       irq_source = DC_IRQ_SOURCE_DMCUB_OUTBOX;
 
        ret = dc_interrupt_set(ctx->dc, irq_source, enable);
 
index b3ed7e777720435f68217477c75729ec0487d6fa..40f617bbb86f4be2d5cea49ee96ead6dd796c5f5 100644 (file)
@@ -769,6 +769,18 @@ static int amdgpu_dm_set_vline0_irq_state(struct amdgpu_device *adev,
                __func__);
 }
 
+static int amdgpu_dm_set_dmub_outbox_irq_state(struct amdgpu_device *adev,
+                                       struct amdgpu_irq_src *source,
+                                       unsigned int crtc_id,
+                                       enum amdgpu_interrupt_state state)
+{
+       enum dc_irq_source irq_source = DC_IRQ_SOURCE_DMCUB_OUTBOX;
+       bool st = (state == AMDGPU_IRQ_STATE_ENABLE);
+
+       dc_interrupt_set(adev->dm.dc, irq_source, st);
+       return 0;
+}
+
 static int amdgpu_dm_set_vupdate_irq_state(struct amdgpu_device *adev,
                                           struct amdgpu_irq_src *source,
                                           unsigned int crtc_id,
@@ -805,6 +817,11 @@ static const struct amdgpu_irq_src_funcs dm_vline0_irq_funcs = {
        .process = amdgpu_dm_irq_handler,
 };
 
+static const struct amdgpu_irq_src_funcs dm_dmub_outbox_irq_funcs = {
+       .set = amdgpu_dm_set_dmub_outbox_irq_state,
+       .process = amdgpu_dm_irq_handler,
+};
+
 static const struct amdgpu_irq_src_funcs dm_vupdate_irq_funcs = {
        .set = amdgpu_dm_set_vupdate_irq_state,
        .process = amdgpu_dm_irq_handler,
@@ -827,13 +844,15 @@ static const struct amdgpu_irq_src_funcs dm_hpd_irq_funcs = {
 
 void amdgpu_dm_set_irq_funcs(struct amdgpu_device *adev)
 {
-
        adev->crtc_irq.num_types = adev->mode_info.num_crtc;
        adev->crtc_irq.funcs = &dm_crtc_irq_funcs;
 
        adev->vline0_irq.num_types = adev->mode_info.num_crtc;
        adev->vline0_irq.funcs = &dm_vline0_irq_funcs;
 
+       adev->dmub_outbox_irq.num_types = 1;
+       adev->dmub_outbox_irq.funcs = &dm_dmub_outbox_irq_funcs;
+
        adev->vupdate_irq.num_types = adev->mode_info.num_crtc;
        adev->vupdate_irq.funcs = &dm_vupdate_irq_funcs;
 
@@ -846,6 +865,12 @@ void amdgpu_dm_set_irq_funcs(struct amdgpu_device *adev)
        adev->hpd_irq.num_types = adev->mode_info.num_hpd;
        adev->hpd_irq.funcs = &dm_hpd_irq_funcs;
 }
+void amdgpu_dm_outbox_init(struct amdgpu_device *adev)
+{
+       dc_interrupt_set(adev->dm.dc,
+               DC_IRQ_SOURCE_DMCUB_OUTBOX,
+               true);
+}
 
 /**
  * amdgpu_dm_hpd_init - hpd setup callback.
index 82f8e761becaeb3c883e238ac13f677b41fa4301..2349238a626b133a2da7a21229fb1418b107de7b 100644 (file)
@@ -82,6 +82,7 @@ void amdgpu_dm_irq_unregister_interrupt(struct amdgpu_device *adev,
 
 void amdgpu_dm_set_irq_funcs(struct amdgpu_device *adev);
 
+void amdgpu_dm_outbox_init(struct amdgpu_device *adev);
 void amdgpu_dm_hpd_init(struct amdgpu_device *adev);
 void amdgpu_dm_hpd_fini(struct amdgpu_device *adev);
 
index 4a0c24ce5f7d8fde678f1b61fb67a93c873f28f1..e6b2eec9fb59ac5cdbc0b5392404ad17b73399bd 100644 (file)
@@ -278,6 +278,9 @@ dm_dp_mst_detect(struct drm_connector *connector,
        struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
        struct amdgpu_dm_connector *master = aconnector->mst_port;
 
+       if (drm_connector_is_unregistered(connector))
+               return connector_status_disconnected;
+
        return drm_dp_mst_detect_port(connector, ctx, &master->mst_mgr,
                                      aconnector->port);
 }
index f33847299bca2a63b077fa4b8c7cc95059ce54f8..5cd89f7e7772c17c7a4e5ae336debd1d5bd4c408 100644 (file)
@@ -54,7 +54,7 @@ AMD_DC = $(addsuffix /Makefile, $(addprefix $(FULL_AMD_DISPLAY_PATH)/dc/,$(DC_LI
 
 include $(AMD_DC)
 
-DISPLAY_CORE = dc.o dc_link.o dc_resource.o dc_hw_sequencer.o dc_sink.o \
+DISPLAY_CORE = dc.o  dc_stat.o dc_link.o dc_resource.o dc_hw_sequencer.o dc_sink.o \
 dc_surface.o dc_link_hwss.o dc_link_dp.o dc_link_ddc.o dc_debug.o dc_stream.o \
 dc_link_enc_cfg.o
 
index d79f4fe06c47ed2fb1b74742cc1b564ed1c661c4..49126a0f66af0d3c76cda579144b8301e8d5735d 100644 (file)
@@ -836,8 +836,10 @@ static enum bp_result bios_parser_get_spread_spectrum_info(
                        return get_ss_info_v4_1(bp, signal, index, ss_info);
                case 2:
                case 3:
+               case 4:
                        return get_ss_info_v4_2(bp, signal, index, ss_info);
                default:
+                       ASSERT(0);
                        break;
                }
                break;
index a06e86853bb9662ba9a0a52b45e1c8f02c913915..f965914ea57c7cad6c266fb5bf733d809dc262ca 100644 (file)
@@ -106,10 +106,10 @@ static void rn_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr,
        for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) {
                int dpp_inst, dppclk_khz, prev_dppclk_khz;
 
-               /* Loop index will match dpp->inst if resource exists,
-                * and we want to avoid dependency on dpp object
+               /* Loop index may not match dpp->inst if some pipes disabled,
+                * so select correct inst from res_pool
                 */
-               dpp_inst = i;
+               dpp_inst = clk_mgr->base.ctx->dc->res_pool->dpps[i]->inst;
                dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz;
 
                prev_dppclk_khz = clk_mgr->dccg->pipe_dppclk_khz[i];
@@ -128,7 +128,7 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base,
        struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
        struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
        struct dc *dc = clk_mgr_base->ctx->dc;
-       int display_count, i;
+       int display_count;
        bool update_dppclk = false;
        bool update_dispclk = false;
        bool dpp_clock_lowered = false;
@@ -210,14 +210,6 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base,
                                clk_mgr_base->clks.dppclk_khz,
                                safe_to_lower);
 
-               for (i = 0; i < context->stream_count; i++) {
-                       if (context->streams[i]->signal == SIGNAL_TYPE_EDP &&
-                               context->streams[i]->apply_seamless_boot_optimization) {
-                               dc_wait_for_vblank(dc, context->streams[i]);
-                               break;
-                       }
-               }
-
                clk_mgr_base->clks.actual_dppclk_khz =
                                rn_vbios_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz);
 
@@ -769,43 +761,6 @@ static struct wm_table ddr4_wm_table_rn = {
        }
 };
 
-static struct wm_table ddr4_1R_wm_table_rn = {
-       .entries = {
-               {
-                       .wm_inst = WM_A,
-                       .wm_type = WM_TYPE_PSTATE_CHG,
-                       .pstate_latency_us = 11.72,
-                       .sr_exit_time_us = 13.90,
-                       .sr_enter_plus_exit_time_us = 14.80,
-                       .valid = true,
-               },
-               {
-                       .wm_inst = WM_B,
-                       .wm_type = WM_TYPE_PSTATE_CHG,
-                       .pstate_latency_us = 11.72,
-                       .sr_exit_time_us = 13.90,
-                       .sr_enter_plus_exit_time_us = 14.80,
-                       .valid = true,
-               },
-               {
-                       .wm_inst = WM_C,
-                       .wm_type = WM_TYPE_PSTATE_CHG,
-                       .pstate_latency_us = 11.72,
-                       .sr_exit_time_us = 13.90,
-                       .sr_enter_plus_exit_time_us = 14.80,
-                       .valid = true,
-               },
-               {
-                       .wm_inst = WM_D,
-                       .wm_type = WM_TYPE_PSTATE_CHG,
-                       .pstate_latency_us = 11.72,
-                       .sr_exit_time_us = 13.90,
-                       .sr_enter_plus_exit_time_us = 14.80,
-                       .valid = true,
-               },
-       }
-};
-
 static struct wm_table lpddr4_wm_table_rn = {
        .entries = {
                {
@@ -842,46 +797,67 @@ static struct wm_table lpddr4_wm_table_rn = {
                },
        }
 };
-static unsigned int find_socclk_for_voltage(struct dpm_clocks *clock_table, unsigned int voltage)
+
+static unsigned int find_max_fclk_for_voltage(struct dpm_clocks *clock_table,
+               unsigned int voltage)
 {
        int i;
+       uint32_t max_clk = 0;
 
-       for (i = 0; i < PP_SMU_NUM_SOCCLK_DPM_LEVELS; i++) {
-               if (clock_table->SocClocks[i].Vol == voltage)
-                       return clock_table->SocClocks[i].Freq;
+       for (i = 0; i < PP_SMU_NUM_FCLK_DPM_LEVELS; i++) {
+               if (clock_table->FClocks[i].Vol <= voltage) {
+                       max_clk = clock_table->FClocks[i].Freq > max_clk ?
+                               clock_table->FClocks[i].Freq : max_clk;
+               }
+       }
+
+       return max_clk;
+}
+
+static unsigned int find_max_memclk_for_voltage(struct dpm_clocks *clock_table,
+               unsigned int voltage)
+{
+       int i;
+       uint32_t max_clk = 0;
+
+       for (i = 0; i < PP_SMU_NUM_MEMCLK_DPM_LEVELS; i++) {
+               if (clock_table->MemClocks[i].Vol <= voltage) {
+                       max_clk = clock_table->MemClocks[i].Freq > max_clk ?
+                               clock_table->MemClocks[i].Freq : max_clk;
+               }
        }
 
-       ASSERT(0);
-       return 0;
+       return max_clk;
 }
-static unsigned int find_dcfclk_for_voltage(struct dpm_clocks *clock_table, unsigned int voltage)
+
+static unsigned int find_max_socclk_for_voltage(struct dpm_clocks *clock_table,
+               unsigned int voltage)
 {
        int i;
+       uint32_t max_clk = 0;
 
-       for (i = 0; i < PP_SMU_NUM_DCFCLK_DPM_LEVELS; i++) {
-               if (clock_table->DcfClocks[i].Vol == voltage)
-                       return clock_table->DcfClocks[i].Freq;
+       for (i = 0; i < PP_SMU_NUM_SOCCLK_DPM_LEVELS; i++) {
+               if (clock_table->SocClocks[i].Vol <= voltage) {
+                       max_clk = clock_table->SocClocks[i].Freq > max_clk ?
+                               clock_table->SocClocks[i].Freq : max_clk;
+               }
        }
 
-       ASSERT(0);
-       return 0;
+       return max_clk;
 }
 
 static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params, struct dpm_clocks *clock_table, struct integrated_info *bios_info)
 {
        int i, j = 0;
+       unsigned int volt;
 
        j = -1;
 
-       ASSERT(PP_SMU_NUM_FCLK_DPM_LEVELS <= MAX_NUM_DPM_LVL);
-
-       /* Find lowest DPM, FCLK is filled in reverse order*/
-
-       for (i = PP_SMU_NUM_FCLK_DPM_LEVELS - 1; i >= 0; i--) {
-               if (clock_table->FClocks[i].Freq != 0 && clock_table->FClocks[i].Vol != 0) {
+       /* Find max DPM */
+       for (i = 0; i < PP_SMU_NUM_DCFCLK_DPM_LEVELS; ++i) {
+               if (clock_table->DcfClocks[i].Freq != 0 &&
+                               clock_table->DcfClocks[i].Vol != 0)
                        j = i;
-                       break;
-               }
        }
 
        if (j == -1) {
@@ -892,13 +868,18 @@ static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params
 
        bw_params->clk_table.num_entries = j + 1;
 
-       for (i = 0; i < bw_params->clk_table.num_entries; i++, j--) {
-               bw_params->clk_table.entries[i].fclk_mhz = clock_table->FClocks[j].Freq;
-               bw_params->clk_table.entries[i].memclk_mhz = clock_table->MemClocks[j].Freq;
-               bw_params->clk_table.entries[i].voltage = clock_table->FClocks[j].Vol;
-               bw_params->clk_table.entries[i].dcfclk_mhz = find_dcfclk_for_voltage(clock_table, clock_table->FClocks[j].Vol);
-               bw_params->clk_table.entries[i].socclk_mhz = find_socclk_for_voltage(clock_table,
-                                                                       bw_params->clk_table.entries[i].voltage);
+       for (i = 0; i < bw_params->clk_table.num_entries; i++) {
+               volt = clock_table->DcfClocks[i].Vol;
+
+               bw_params->clk_table.entries[i].voltage = volt;
+               bw_params->clk_table.entries[i].dcfclk_mhz =
+                       clock_table->DcfClocks[i].Freq;
+               bw_params->clk_table.entries[i].fclk_mhz =
+                       find_max_fclk_for_voltage(clock_table, volt);
+               bw_params->clk_table.entries[i].memclk_mhz =
+                       find_max_memclk_for_voltage(clock_table, volt);
+               bw_params->clk_table.entries[i].socclk_mhz =
+                       find_max_socclk_for_voltage(clock_table, volt);
        }
 
        bw_params->vram_type = bios_info->memory_type;
@@ -990,12 +971,8 @@ void rn_clk_mgr_construct(
                } else {
                        if (is_green_sardine)
                                rn_bw_params.wm_table = ddr4_wm_table_gs;
-                       else {
-                               if (ctx->dc->config.is_single_rank_dimm)
-                                       rn_bw_params.wm_table = ddr4_1R_wm_table_rn;
-                               else
-                                       rn_bw_params.wm_table = ddr4_wm_table_rn;
-                       }
+                       else
+                               rn_bw_params.wm_table = ddr4_wm_table_rn;
                }
                /* Saved clocks configured at boot for debug purposes */
                rn_dump_clk_registers(&clk_mgr->base.boot_snapshot, &clk_mgr->base, &log_info);
@@ -1013,9 +990,6 @@ void rn_clk_mgr_construct(
                if (status == PP_SMU_RESULT_OK &&
                    ctx->dc_bios && ctx->dc_bios->integrated_info) {
                        rn_clk_mgr_helper_populate_bw_params (clk_mgr->base.bw_params, &clock_table, ctx->dc_bios->integrated_info);
-                       /* treat memory config as single channel if memory is asymmetrics. */
-                       if (ctx->dc->config.is_asymmetric_memory)
-                               clk_mgr->base.bw_params->num_channels = 1;
                }
        }
 
index 4713f09bcbf18f8a39825eb016bdc058048a587a..d6f11dbf1bd42b632eff344e3d5f68512e89c10d 100644 (file)
@@ -59,7 +59,6 @@
 #include "dc_link_ddc.h"
 #include "dm_helpers.h"
 #include "mem_input.h"
-#include "hubp.h"
 
 #include "dc_link_dp.h"
 #include "dc_dmub_srv.h"
@@ -3219,19 +3218,6 @@ void dc_link_remove_remote_sink(struct dc_link *link, struct dc_sink *sink)
        }
 }
 
-void dc_wait_for_vblank(struct dc *dc, struct dc_stream_state *stream)
-{
-       int i;
-
-       for (i = 0; i < dc->res_pool->pipe_count; i++)
-               if (dc->current_state->res_ctx.pipe_ctx[i].stream == stream) {
-                       struct timing_generator *tg =
-                               dc->current_state->res_ctx.pipe_ctx[i].stream_res.tg;
-                       tg->funcs->wait_for_state(tg, CRTC_STATE_VBLANK);
-                       break;
-               }
-}
-
 void get_clock_requirements_for_state(struct dc_state *state, struct AsicStateEx *info)
 {
        info->displayClock                              = (unsigned int)state->bw_ctx.bw.dcn.clk.dispclk_khz;
@@ -3287,7 +3273,7 @@ void dc_allow_idle_optimizations(struct dc *dc, bool allow)
        if (dc->debug.disable_idle_power_optimizations)
                return;
 
-       if (dc->clk_mgr->funcs->is_smu_present)
+       if (dc->clk_mgr != NULL && dc->clk_mgr->funcs->is_smu_present)
                if (!dc->clk_mgr->funcs->is_smu_present(dc->clk_mgr))
                        return;
 
index f4374d83662aeb7432a2c8e9ade1ef3644a5094b..81beff4e94348c22440c1cb4a623eb127af1c41d 100644 (file)
@@ -48,6 +48,7 @@
 #include "dce/dmub_psr.h"
 #include "dmub/dmub_srv.h"
 #include "inc/hw/panel_cntl.h"
+#include "inc/link_enc_cfg.h"
 
 #define DC_LOGGER_INIT(logger)
 
@@ -247,6 +248,16 @@ bool dc_link_detect_sink(struct dc_link *link, enum dc_connection_type *type)
                link->dc->hwss.edp_wait_for_hpd_ready(link, true);
        }
 
+       /* Link may not have physical HPD pin. */
+       if (link->ep_type != DISPLAY_ENDPOINT_PHY) {
+               if (link->hpd_status)
+                       *type = dc_connection_single;
+               else
+                       *type = dc_connection_none;
+
+               return true;
+       }
+
        /* todo: may need to lock gpio access */
        hpd_pin = get_hpd_gpio(link->ctx->dc_bios, link->link_id,
                               link->ctx->gpio_service);
@@ -432,8 +443,18 @@ bool dc_link_is_dp_sink_present(struct dc_link *link)
 static enum signal_type link_detect_sink(struct dc_link *link,
                                         enum dc_detect_reason reason)
 {
-       enum signal_type result = get_basic_signal_type(link->link_enc->id,
-                                                       link->link_id);
+       enum signal_type result;
+       struct graphics_object_id enc_id;
+
+       if (link->is_dig_mapping_flexible)
+               enc_id = (struct graphics_object_id){.id = ENCODER_ID_UNKNOWN};
+       else
+               enc_id = link->link_enc->id;
+       result = get_basic_signal_type(enc_id, link->link_id);
+
+       /* Use basic signal type for link without physical connector. */
+       if (link->ep_type != DISPLAY_ENDPOINT_PHY)
+               return result;
 
        /* Internal digital encoder will detect only dongles
         * that require digital signal
@@ -762,19 +783,20 @@ static bool detect_dp(struct dc_link *link,
                }
 
                if (link->type != dc_connection_mst_branch &&
-                   is_dp_active_dongle(link)) {
-                       /* DP active dongles */
-                       link->type = dc_connection_active_dongle;
+                   is_dp_branch_device(link)) {
+                       /* DP SST branch */
+                       link->type = dc_connection_sst_branch;
                        if (!link->dpcd_caps.sink_count.bits.SINK_COUNT) {
                                /*
-                                * active dongle unplug processing for short irq
+                                * SST branch unplug processing for short irq
                                 */
                                link_disconnect_sink(link);
                                return true;
                        }
 
-                       if (link->dpcd_caps.dongle_type !=
-                           DISPLAY_DONGLE_DP_HDMI_CONVERTER)
+                       if (is_dp_active_dongle(link) &&
+                               (link->dpcd_caps.dongle_type !=
+                                       DISPLAY_DONGLE_DP_HDMI_CONVERTER))
                                *converter_disable_audio = true;
                }
        } else {
@@ -954,7 +976,8 @@ static bool dc_link_detect_helper(struct dc_link *link,
 
                case SIGNAL_TYPE_DISPLAY_PORT: {
                        /* wa HPD high coming too early*/
-                       if (link->link_enc->features.flags.bits.DP_IS_USB_C == 1) {
+                       if (link->ep_type == DISPLAY_ENDPOINT_PHY &&
+                           link->link_enc->features.flags.bits.DP_IS_USB_C == 1) {
                                /* if alt mode times out, return false */
                                if (!wait_for_entering_dp_alt_mode(link))
                                        return false;
@@ -974,8 +997,8 @@ static bool dc_link_detect_helper(struct dc_link *link,
                                           sizeof(struct dpcd_caps)))
                                        same_dpcd = false;
                        }
-                       /* Active dongle downstream unplug*/
-                       if (link->type == dc_connection_active_dongle &&
+                       /* Active SST downstream branch device unplug*/
+                       if (link->type == dc_connection_sst_branch &&
                            link->dpcd_caps.sink_count.bits.SINK_COUNT == 0) {
                                if (prev_sink)
                                        /* Downstream unplug */
@@ -1206,14 +1229,25 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
 {
        const struct dc *dc = link->dc;
        bool ret;
+       bool can_apply_seamless_boot = false;
+       int i;
+
+       for (i = 0; i < dc->current_state->stream_count; i++) {
+               if (dc->current_state->streams[i]->apply_seamless_boot_optimization) {
+                       can_apply_seamless_boot = true;
+                       break;
+               }
+       }
 
        /* get out of low power state */
-       clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr);
+       if (!can_apply_seamless_boot && reason != DETECT_REASON_BOOT)
+               clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr);
 
        ret = dc_link_detect_helper(link, reason);
 
        /* Go back to power optimized state */
-       clk_mgr_optimize_pwr_state(dc, dc->clk_mgr);
+       if (!can_apply_seamless_boot && reason != DETECT_REASON_BOOT)
+               clk_mgr_optimize_pwr_state(dc, dc->clk_mgr);
 
        return ret;
 }
@@ -1716,6 +1750,8 @@ static enum dc_status enable_link_dp(struct dc_state *state,
        bool apply_seamless_boot_optimization = false;
        uint32_t bl_oled_enable_delay = 50; // in ms
        const uint32_t post_oui_delay = 30; // 30ms
+       /* Reduce link bandwidth between failed link training attempts. */
+       bool do_fallback = false;
 
        // check for seamless boot
        for (i = 0; i < state->stream_count; i++) {
@@ -1754,7 +1790,8 @@ static enum dc_status enable_link_dp(struct dc_state *state,
                                               skip_video_pattern,
                                               LINK_TRAINING_ATTEMPTS,
                                               pipe_ctx,
-                                              pipe_ctx->stream->signal)) {
+                                              pipe_ctx->stream->signal,
+                                              do_fallback)) {
                link->cur_link_settings = link_settings;
                status = DC_OK;
        } else {
@@ -3475,9 +3512,11 @@ uint32_t dc_bandwidth_in_kbps_from_timing(
        uint32_t kbps;
 
 #if defined(CONFIG_DRM_AMD_DC_DCN)
-       if (timing->flags.DSC) {
-               return dc_dsc_stream_bandwidth_in_kbps(timing->pix_clk_100hz, timing->dsc_cfg.bits_per_pixel);
-       }
+       if (timing->flags.DSC)
+               return dc_dsc_stream_bandwidth_in_kbps(timing,
+                               timing->dsc_cfg.bits_per_pixel,
+                               timing->dsc_cfg.num_slices_h,
+                               timing->dsc_cfg.is_dp);
 #endif
 
        switch (timing->display_color_depth) {
@@ -3539,19 +3578,6 @@ void dc_link_set_drive_settings(struct dc *dc,
        dc_link_dp_set_drive_settings(dc->links[i], lt_settings);
 }
 
-void dc_link_perform_link_training(struct dc *dc,
-                                  struct dc_link_settings *link_setting,
-                                  bool skip_video_pattern)
-{
-       int i;
-
-       for (i = 0; i < dc->link_count; i++)
-               dc_link_dp_perform_link_training(
-                       dc->links[i],
-                       link_setting,
-                       skip_video_pattern);
-}
-
 void dc_link_set_preferred_link_settings(struct dc *dc,
                                         struct dc_link_settings *link_setting,
                                         struct dc_link *link)
@@ -3702,8 +3728,22 @@ void dc_link_overwrite_extended_receiver_cap(
 
 bool dc_link_is_fec_supported(const struct dc_link *link)
 {
+       struct link_encoder *link_enc = NULL;
+
+       /* Links supporting dynamically assigned link encoder will be assigned next
+        * available encoder if one not already assigned.
+        */
+       if (link->is_dig_mapping_flexible &&
+                       link->dc->res_pool->funcs->link_encs_assign) {
+               link_enc = link_enc_cfg_get_link_enc_used_by_link(link->dc->current_state, link);
+               if (link_enc == NULL)
+                       link_enc = link_enc_cfg_get_next_avail_link_enc(link->dc, link->dc->current_state);
+       } else
+               link_enc = link->link_enc;
+       ASSERT(link_enc);
+
        return (dc_is_dp_signal(link->connector_signal) &&
-                       link->link_enc->features.fec_supported &&
+                       link_enc->features.fec_supported &&
                        link->dpcd_caps.fec_cap.bits.FEC_CAPABLE &&
                        !IS_FPGA_MAXIMUS_DC(link->ctx->dce_environment));
 }
index 64414c51312dec3638f74f41bb8248e62e73f8c4..ba6b56f2026907438494d9eec23d4f164f1776aa 100644 (file)
@@ -658,7 +658,10 @@ int dc_link_aux_transfer_raw(struct ddc_service *ddc,
                struct aux_payload *payload,
                enum aux_return_code_type *operation_result)
 {
-       return dce_aux_transfer_raw(ddc, payload, operation_result);
+       if (dc_enable_dmub_notifications(ddc->ctx->dc))
+               return dce_aux_transfer_dmub_raw(ddc, payload, operation_result);
+       else
+               return dce_aux_transfer_raw(ddc, payload, operation_result);
 }
 
 /* dc_link_aux_transfer_with_retries() - Attempt to submit an
@@ -682,6 +685,10 @@ bool dc_link_aux_try_to_configure_timeout(struct ddc_service *ddc,
        bool result = false;
        struct ddc *ddc_pin = ddc->ddc_pin;
 
+       /* Do not try to access nonexistent DDC pin. */
+       if (ddc->link->ep_type != DISPLAY_ENDPOINT_PHY)
+               return true;
+
        if (ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en]->funcs->configure_timeout) {
                ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en]->funcs->configure_timeout(ddc, timeout);
                result = true;
index afa43181dec6870f64af7651bc1281fe7d06eb88..ba4883fca616c95e68296ed06db101103dbb1417 100644 (file)
@@ -14,6 +14,7 @@
 #include "dpcd_defs.h"
 #include "dc_dmub_srv.h"
 #include "dce/dmub_hw_lock_mgr.h"
+#include "inc/link_enc_cfg.h"
 
 /*Travis*/
 static const uint8_t DP_VGA_LVDS_CONVERTER_ID_2[] = "sivarT";
@@ -107,10 +108,50 @@ static void wait_for_training_aux_rd_interval(
                wait_in_micro_secs);
 }
 
+static enum dpcd_training_patterns
+       dc_dp_training_pattern_to_dpcd_training_pattern(
+       struct dc_link *link,
+       enum dc_dp_training_pattern pattern)
+{
+       enum dpcd_training_patterns dpcd_tr_pattern =
+       DPCD_TRAINING_PATTERN_VIDEOIDLE;
+
+       switch (pattern) {
+       case DP_TRAINING_PATTERN_SEQUENCE_1:
+               dpcd_tr_pattern = DPCD_TRAINING_PATTERN_1;
+               break;
+       case DP_TRAINING_PATTERN_SEQUENCE_2:
+               dpcd_tr_pattern = DPCD_TRAINING_PATTERN_2;
+               break;
+       case DP_TRAINING_PATTERN_SEQUENCE_3:
+               dpcd_tr_pattern = DPCD_TRAINING_PATTERN_3;
+               break;
+       case DP_TRAINING_PATTERN_SEQUENCE_4:
+               dpcd_tr_pattern = DPCD_TRAINING_PATTERN_4;
+               break;
+       case DP_TRAINING_PATTERN_VIDEOIDLE:
+               dpcd_tr_pattern = DPCD_TRAINING_PATTERN_VIDEOIDLE;
+               break;
+       default:
+               ASSERT(0);
+               DC_LOG_HW_LINK_TRAINING("%s: Invalid HW Training pattern: %d\n",
+                       __func__, pattern);
+               break;
+       }
+
+       return dpcd_tr_pattern;
+}
+
 static void dpcd_set_training_pattern(
        struct dc_link *link,
-       union dpcd_training_pattern dpcd_pattern)
+       enum dc_dp_training_pattern training_pattern)
 {
+       union dpcd_training_pattern dpcd_pattern = { {0} };
+
+       dpcd_pattern.v1_4.TRAINING_PATTERN_SET =
+                       dc_dp_training_pattern_to_dpcd_training_pattern(
+                                       link, training_pattern);
+
        core_link_write_dpcd(
                link,
                DP_TRAINING_PATTERN_SET,
@@ -132,10 +173,22 @@ static enum dc_dp_training_pattern decide_cr_training_pattern(
 static enum dc_dp_training_pattern decide_eq_training_pattern(struct dc_link *link,
                const struct dc_link_settings *link_settings)
 {
+       struct link_encoder *link_enc;
        enum dc_dp_training_pattern highest_tp = DP_TRAINING_PATTERN_SEQUENCE_2;
-       struct encoder_feature_support *features = &link->link_enc->features;
+       struct encoder_feature_support *features;
        struct dpcd_caps *dpcd_caps = &link->dpcd_caps;
 
+       /* Access link encoder capability based on whether it is statically
+        * or dynamically assigned to a link.
+        */
+       if (link->is_dig_mapping_flexible &&
+                       link->dc->res_pool->funcs->link_encs_assign)
+               link_enc = link_enc_cfg_get_link_enc_used_by_link(link->dc->current_state, link);
+       else
+               link_enc = link->link_enc;
+       ASSERT(link_enc);
+       features = &link_enc->features;
+
        if (features->flags.bits.IS_TPS3_CAPABLE)
                highest_tp = DP_TRAINING_PATTERN_SEQUENCE_3;
 
@@ -227,37 +280,6 @@ static void dpcd_set_link_settings(
        }
 }
 
-static enum dpcd_training_patterns
-       dc_dp_training_pattern_to_dpcd_training_pattern(
-       struct dc_link *link,
-       enum dc_dp_training_pattern pattern)
-{
-       enum dpcd_training_patterns dpcd_tr_pattern =
-       DPCD_TRAINING_PATTERN_VIDEOIDLE;
-
-       switch (pattern) {
-       case DP_TRAINING_PATTERN_SEQUENCE_1:
-               dpcd_tr_pattern = DPCD_TRAINING_PATTERN_1;
-               break;
-       case DP_TRAINING_PATTERN_SEQUENCE_2:
-               dpcd_tr_pattern = DPCD_TRAINING_PATTERN_2;
-               break;
-       case DP_TRAINING_PATTERN_SEQUENCE_3:
-               dpcd_tr_pattern = DPCD_TRAINING_PATTERN_3;
-               break;
-       case DP_TRAINING_PATTERN_SEQUENCE_4:
-               dpcd_tr_pattern = DPCD_TRAINING_PATTERN_4;
-               break;
-       default:
-               ASSERT(0);
-               DC_LOG_HW_LINK_TRAINING("%s: Invalid HW Training pattern: %d\n",
-                       __func__, pattern);
-               break;
-       }
-
-       return dpcd_tr_pattern;
-}
-
 static uint8_t dc_dp_initialize_scrambling_data_symbols(
        struct dc_link *link,
        enum dc_dp_training_pattern pattern)
@@ -420,20 +442,30 @@ static bool is_cr_done(enum dc_lane_count ln_count,
 }
 
 static bool is_ch_eq_done(enum dc_lane_count ln_count,
-       union lane_status *dpcd_lane_status,
-       union lane_align_status_updated *lane_status_updated)
+               union lane_status *dpcd_lane_status)
 {
+       bool done = true;
        uint32_t lane;
-       if (!lane_status_updated->bits.INTERLANE_ALIGN_DONE)
-               return false;
-       else {
-               for (lane = 0; lane < (uint32_t)(ln_count); lane++) {
-                       if (!dpcd_lane_status[lane].bits.SYMBOL_LOCKED_0 ||
-                               !dpcd_lane_status[lane].bits.CHANNEL_EQ_DONE_0)
-                               return false;
-               }
-       }
-       return true;
+       for (lane = 0; lane < (uint32_t)(ln_count); lane++)
+               if (!dpcd_lane_status[lane].bits.CHANNEL_EQ_DONE_0)
+                       done = false;
+       return done;
+}
+
+static bool is_symbol_locked(enum dc_lane_count ln_count,
+               union lane_status *dpcd_lane_status)
+{
+       bool locked = true;
+       uint32_t lane;
+       for (lane = 0; lane < (uint32_t)(ln_count); lane++)
+               if (!dpcd_lane_status[lane].bits.SYMBOL_LOCKED_0)
+                       locked = false;
+       return locked;
+}
+
+static inline bool is_interlane_aligned(union lane_align_status_updated align_status)
+{
+       return align_status.bits.INTERLANE_ALIGN_DONE == 1;
 }
 
 static void update_drive_settings(
@@ -835,10 +867,9 @@ static bool perform_post_lt_adj_req_sequence(
                        if (!is_cr_done(lane_count, dpcd_lane_status))
                                return false;
 
-                       if (!is_ch_eq_done(
-                               lane_count,
-                               dpcd_lane_status,
-                               &dpcd_lane_status_updated))
+                       if (!is_ch_eq_done(lane_count, dpcd_lane_status) ||
+                                       !is_symbol_locked(lane_count, dpcd_lane_status) ||
+                                       !is_interlane_aligned(dpcd_lane_status_updated))
                                return false;
 
                        for (lane = 0; lane < (uint32_t)(lane_count); lane++) {
@@ -992,9 +1023,9 @@ static enum link_training_result perform_channel_equalization_sequence(
                        return LINK_TRAINING_EQ_FAIL_CR;
 
                /* 6. check CHEQ done*/
-               if (is_ch_eq_done(lane_count,
-                       dpcd_lane_status,
-                       &dpcd_lane_status_updated))
+               if (is_ch_eq_done(lane_count, dpcd_lane_status) &&
+                               is_symbol_locked(lane_count, dpcd_lane_status) &&
+                               is_interlane_aligned(dpcd_lane_status_updated))
                        return LINK_TRAINING_SUCCESS;
 
                /* 7. update VS/PE/PC2 in lt_settings*/
@@ -1162,7 +1193,7 @@ static inline enum link_training_result perform_link_training_int(
        return status;
 }
 
-static enum link_training_result check_link_loss_status(
+enum link_training_result dp_check_link_loss_status(
        struct dc_link *link,
        const struct link_training_settings *link_training_setting)
 {
@@ -1296,7 +1327,7 @@ static void initialize_training_settings(
                lt_settings->enhanced_framing = 1;
 }
 
-static uint8_t convert_to_count(uint8_t lttpr_repeater_count)
+uint8_t dp_convert_to_count(uint8_t lttpr_repeater_count)
 {
        switch (lttpr_repeater_count) {
        case 0x80: // 1 lttpr repeater
@@ -1365,7 +1396,8 @@ static void configure_lttpr_mode_non_transparent(struct dc_link *link)
                        link->dpcd_caps.lttpr_caps.mode = repeater_mode;
                }
 
-               repeater_cnt = convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
+               repeater_cnt = dp_convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
+
                for (repeater_id = repeater_cnt; repeater_id > 0; repeater_id--) {
                        aux_interval_address = DP_TRAINING_AUX_RD_INTERVAL_PHY_REPEATER1 +
                                                ((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (repeater_id - 1));
@@ -1555,7 +1587,6 @@ enum link_training_result dc_link_dp_perform_link_training(
 {
        enum link_training_result status = LINK_TRAINING_SUCCESS;
        struct link_training_settings lt_settings;
-       union dpcd_training_pattern dpcd_pattern = { { 0 } };
 
        bool fec_enable;
        uint8_t repeater_cnt;
@@ -1591,7 +1622,7 @@ enum link_training_result dc_link_dp_perform_link_training(
                /* 2. perform link training (set link training done
                 *  to false is done as well)
                 */
-               repeater_cnt = convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
+               repeater_cnt = dp_convert_to_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
 
                for (repeater_id = repeater_cnt; (repeater_id > 0 && status == LINK_TRAINING_SUCCESS);
                                repeater_id--) {
@@ -1621,8 +1652,7 @@ enum link_training_result dc_link_dp_perform_link_training(
        }
 
        /* 3. set training not in progress*/
-       dpcd_pattern.v1_4.TRAINING_PATTERN_SET = DPCD_TRAINING_PATTERN_VIDEOIDLE;
-       dpcd_set_training_pattern(link, dpcd_pattern);
+       dpcd_set_training_pattern(link, DP_TRAINING_PATTERN_VIDEOIDLE);
        if ((status == LINK_TRAINING_SUCCESS) || !skip_video_pattern) {
                status = perform_link_training_int(link,
                                &lt_settings,
@@ -1634,7 +1664,7 @@ enum link_training_result dc_link_dp_perform_link_training(
         */
        if (link->connector_signal != SIGNAL_TYPE_EDP && status == LINK_TRAINING_SUCCESS) {
                msleep(5);
-               status = check_link_loss_status(link, &lt_settings);
+               status = dp_check_link_loss_status(link, &lt_settings);
        }
 
        /* 6. print status message*/
@@ -1687,18 +1717,31 @@ bool perform_link_training_with_retries(
        bool skip_video_pattern,
        int attempts,
        struct pipe_ctx *pipe_ctx,
-       enum signal_type signal)
+       enum signal_type signal,
+       bool do_fallback)
 {
        uint8_t j;
        uint8_t delay_between_attempts = LINK_TRAINING_RETRY_DELAY;
        struct dc_stream_state *stream = pipe_ctx->stream;
        struct dc_link *link = stream->link;
        enum dp_panel_mode panel_mode;
+       struct link_encoder *link_enc;
+       enum link_training_result status = LINK_TRAINING_CR_FAIL_LANE0;
+       struct dc_link_settings currnet_setting = *link_setting;
+
+       /* Dynamically assigned link encoders associated with stream rather than
+        * link.
+        */
+       if (link->dc->res_pool->funcs->link_encs_assign)
+               link_enc = stream->link_enc;
+       else
+               link_enc = link->link_enc;
+       ASSERT(link_enc);
 
        /* We need to do this before the link training to ensure the idle pattern in SST
         * mode will be sent right after the link training
         */
-       link->link_enc->funcs->connect_dig_be_to_fe(link->link_enc,
+       link_enc->funcs->connect_dig_be_to_fe(link_enc,
                                                        pipe_ctx->stream_res.stream_enc->id, true);
 
        for (j = 0; j < attempts; ++j) {
@@ -1710,7 +1753,7 @@ bool perform_link_training_with_retries(
                        link,
                        signal,
                        pipe_ctx->clock_source->id,
-                       link_setting);
+                       &currnet_setting);
 
                if (stream->sink_patches.dppowerup_delay > 0) {
                        int delay_dp_power_up_in_ms = stream->sink_patches.dppowerup_delay;
@@ -1725,14 +1768,12 @@ bool perform_link_training_with_retries(
                         panel_mode != DP_PANEL_MODE_DEFAULT);
 
                if (link->aux_access_disabled) {
-                       dc_link_dp_perform_link_training_skip_aux(link, link_setting);
+                       dc_link_dp_perform_link_training_skip_aux(link, &currnet_setting);
                        return true;
                } else {
-                       enum link_training_result status = LINK_TRAINING_CR_FAIL_LANE0;
-
                                status = dc_link_dp_perform_link_training(
                                                                                link,
-                                                                               link_setting,
+                                                                               &currnet_setting,
                                                                                skip_video_pattern);
                        if (status == LINK_TRAINING_SUCCESS)
                                return true;
@@ -1740,7 +1781,7 @@ bool perform_link_training_with_retries(
 
                /* latest link training still fail, skip delay and keep PHY on
                 */
-               if (j == (attempts - 1))
+               if (j == (attempts - 1) && link->ep_type == DISPLAY_ENDPOINT_PHY)
                        break;
 
                DC_LOG_WARNING("%s: Link training attempt %u of %d failed\n",
@@ -1748,6 +1789,19 @@ bool perform_link_training_with_retries(
 
                dp_disable_link_phy(link, signal);
 
+               /* Abort link training if failure due to sink being unplugged. */
+               if (status == LINK_TRAINING_ABORT)
+                       break;
+               else if (do_fallback) {
+                       decide_fallback_link_setting(*link_setting, &currnet_setting, status);
+                       /* Fail link training if reduced link bandwidth no longer meets
+                        * stream requirements.
+                        */
+                       if (dc_bandwidth_in_kbps_from_timing(&stream->timing) <
+                                       dc_link_bandwidth_kbps(link, &currnet_setting))
+                               break;
+               }
+
                msleep(delay_between_attempts);
 
                delay_between_attempts += LINK_TRAINING_RETRY_DELAY;
@@ -2429,6 +2483,12 @@ bool dp_validate_mode_timing(
 
        const struct dc_link_settings *link_setting;
 
+       /* According to spec, VSC SDP should be used if pixel format is YCbCr420 */
+       if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420 &&
+                       !link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED &&
+                       dal_graphics_object_id_get_connector_id(link->link_id) != CONNECTOR_ID_VIRTUAL)
+               return false;
+
        /*always DP fail safe mode*/
        if ((timing->pix_clk_100hz / 10) == (uint32_t) 25175 &&
                timing->h_addressable == (uint32_t) 640 &&
@@ -2611,13 +2671,11 @@ static bool allow_hpd_rx_irq(const struct dc_link *link)
        /*
         * Don't handle RX IRQ unless one of following is met:
         * 1) The link is established (cur_link_settings != unknown)
-        * 2) We kicked off MST detection
-        * 3) We know we're dealing with an active dongle
+        * 2) We know we're dealing with a branch device, SST or MST
         */
 
        if ((link->cur_link_settings.lane_count != LANE_COUNT_UNKNOWN) ||
-               (link->type == dc_connection_mst_branch) ||
-               is_dp_active_dongle(link))
+               is_dp_branch_device(link))
                return true;
 
        return false;
@@ -2917,6 +2975,22 @@ static void dp_test_send_link_test_pattern(struct dc_link *link)
                break;
        }
 
+       switch (dpcd_test_params.bits.CLR_FORMAT) {
+       case 0:
+               pipe_ctx->stream->timing.pixel_encoding = PIXEL_ENCODING_RGB;
+               break;
+       case 1:
+               pipe_ctx->stream->timing.pixel_encoding = PIXEL_ENCODING_YCBCR422;
+               break;
+       case 2:
+               pipe_ctx->stream->timing.pixel_encoding = PIXEL_ENCODING_YCBCR444;
+               break;
+       default:
+               pipe_ctx->stream->timing.pixel_encoding = PIXEL_ENCODING_RGB;
+               break;
+       }
+
+
        if (requestColorDepth != COLOR_DEPTH_UNDEFINED
                        && pipe_ctx->stream->timing.display_color_depth != requestColorDepth) {
                DC_LOG_DEBUG("%s: original bpc %d, changing to %d\n",
@@ -2924,9 +2998,10 @@ static void dp_test_send_link_test_pattern(struct dc_link *link)
                                pipe_ctx->stream->timing.display_color_depth,
                                requestColorDepth);
                pipe_ctx->stream->timing.display_color_depth = requestColorDepth;
-               dp_update_dsc_config(pipe_ctx);
        }
 
+       dp_update_dsc_config(pipe_ctx);
+
        dc_link_dp_set_test_pattern(
                        link,
                        test_pattern,
@@ -3182,7 +3257,7 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd
                        *out_link_loss = true;
        }
 
-       if (link->type == dc_connection_active_dongle &&
+       if (link->type == dc_connection_sst_branch &&
                hpd_irq_dpcd_data.bytes.sink_cnt.bits.SINK_COUNT
                        != link->dpcd_sink_count)
                status = true;
@@ -3232,6 +3307,12 @@ bool is_mst_supported(struct dc_link *link)
 }
 
 bool is_dp_active_dongle(const struct dc_link *link)
+{
+       return (link->dpcd_caps.dongle_type >= DISPLAY_DONGLE_DP_VGA_CONVERTER) &&
+                               (link->dpcd_caps.dongle_type <= DISPLAY_DONGLE_DP_HDMI_CONVERTER);
+}
+
+bool is_dp_branch_device(const struct dc_link *link)
 {
        return link->dpcd_caps.is_branch_dev;
 }
@@ -3593,7 +3674,9 @@ static bool retrieve_link_cap(struct dc_link *link)
                                lttpr_dpcd_data[DP_PHY_REPEATER_EXTENDED_WAIT_TIMEOUT -
                                                                DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
 
+               /* Attempt to train in LTTPR transparent mode if repeater count exceeds 8. */
                is_lttpr_present = (link->dpcd_caps.lttpr_caps.phy_repeater_cnt > 0 &&
+                               link->dpcd_caps.lttpr_caps.phy_repeater_cnt < 0xff &&
                                link->dpcd_caps.lttpr_caps.max_lane_count > 0 &&
                                link->dpcd_caps.lttpr_caps.max_lane_count <= 4 &&
                                link->dpcd_caps.lttpr_caps.revision.raw >= 0x14);
index 1361b87d86d70447540822d464390a42cb13a4cc..1a89d565c92e542e48312c89f9c9e9864d580163 100644 (file)
@@ -112,8 +112,8 @@ static void update_link_enc_assignment(
 
 /* Return first available DIG link encoder. */
 static enum engine_id find_first_avail_link_enc(
-               struct dc_context *ctx,
-               struct dc_state *state)
+               const struct dc_context *ctx,
+               const struct dc_state *state)
 {
        enum engine_id eng_id = ENGINE_ID_UNKNOWN;
        int i;
@@ -270,7 +270,7 @@ struct dc_link *link_enc_cfg_get_link_using_link_enc(
 
 struct link_encoder *link_enc_cfg_get_link_enc_used_by_link(
                struct dc_state *state,
-               struct dc_link *link)
+               const struct dc_link *link)
 {
        struct link_encoder *link_enc = NULL;
        struct display_endpoint_id ep_id;
@@ -296,8 +296,20 @@ struct link_encoder *link_enc_cfg_get_link_enc_used_by_link(
 
        if (stream_idx != -1)
                link_enc = state->streams[stream_idx]->link_enc;
-       else
-               dm_output_to_console("%s: No link encoder used by link(%d).\n", __func__, link->link_index);
+
+       return link_enc;
+}
+
+struct link_encoder *link_enc_cfg_get_next_avail_link_enc(
+       const struct dc *dc,
+       const struct dc_state *state)
+{
+       struct link_encoder *link_enc = NULL;
+       enum engine_id eng_id = ENGINE_ID_UNKNOWN;
+
+       eng_id = find_first_avail_link_enc(dc->ctx, state);
+       if (eng_id != ENGINE_ID_UNKNOWN)
+               link_enc = dc->res_pool->link_encoders[eng_id - ENGINE_ID_DIGA];
 
        return link_enc;
 }
index b426f878fb99a8079a00ba1b9f5a9cf106d7af89..13c5c4a34a58473d190ab792912ee7ceadc36e32 100644 (file)
@@ -384,7 +384,8 @@ void dp_retrain_link_dp_test(struct dc_link *link,
                                        skip_video_pattern,
                                        LINK_TRAINING_ATTEMPTS,
                                        &pipes[i],
-                                       SIGNAL_TYPE_DISPLAY_PORT);
+                                       SIGNAL_TYPE_DISPLAY_PORT,
+                                       false);
 
                        link->dc->hwss.enable_stream(&pipes[i]);
 
index 8cb937c046aa0d1a2522b061208882303f973b9d..f2bc630a0871a10f5b813b9be14871da4cbd613b 100644 (file)
@@ -1706,12 +1706,6 @@ static bool is_timing_changed(struct dc_stream_state *cur_stream,
        if (cur_stream == NULL)
                return true;
 
-       /* If sink pointer changed, it means this is a hotplug, we should do
-        * full hw setting.
-        */
-       if (cur_stream->sink != new_stream->sink)
-               return true;
-
        /* If output color space is changed, need to reprogram info frames */
        if (cur_stream->output_color_space != new_stream->output_color_space)
                return true;
@@ -2679,6 +2673,7 @@ void dc_resource_state_destruct(struct dc_state *context)
                dc_stream_release(context->streams[i]);
                context->streams[i] = NULL;
        }
+       context->stream_count = 0;
 }
 
 void dc_resource_state_copy_construct(
index 100d434f7a03887434a3fab73d10b46cb92867eb..d26153ab9d62c5d1aaabcb7fae43cd7076785664 100644 (file)
@@ -45,7 +45,7 @@
 /* forward declaration */
 struct aux_payload;
 
-#define DC_VER "3.2.132"
+#define DC_VER "3.2.135.1"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
@@ -308,8 +308,6 @@ struct dc_config {
 #endif
        uint64_t vblank_alignment_dto_params;
        uint8_t  vblank_alignment_max_frame_time_diff;
-       bool is_asymmetric_memory;
-       bool is_single_rank_dimm;
 };
 
 enum visual_confirm {
@@ -600,7 +598,6 @@ struct dc_bounding_box_overrides {
        int min_dcfclk_mhz;
 };
 
-struct dc_state;
 struct resource_pool;
 struct dce_hwseq;
 struct gpu_info_soc_bounding_box_v1_0;
@@ -719,7 +716,6 @@ void dc_init_callbacks(struct dc *dc,
 void dc_deinit_callbacks(struct dc *dc);
 void dc_destroy(struct dc **dc);
 
-void dc_wait_for_vblank(struct dc *dc, struct dc_stream_state *stream);
 /*******************************************************************************
  * Surface Interfaces
  ******************************************************************************/
index 6b72af2b3f4c34cb62bc666784fd03b51c14223f..c5dc3a947020ad589ba599253f7d90aef90aecdd 100644 (file)
@@ -180,5 +180,5 @@ bool dc_dmub_srv_get_dmub_outbox0_msg(const struct dc *dc, struct dmcub_trace_bu
 
 void dc_dmub_trace_event_control(struct dc *dc, bool enable)
 {
-       dm_helpers_dmub_outbox0_interrupt_control(dc->ctx, enable);
+       dm_helpers_dmub_outbox_interrupt_control(dc->ctx, enable);
 }
index cc6fb838420e8c00c7fe20cabd5b88147e55f2ba..fcfde2e04292d5d90cc63ad2c82c3db5a3eb29bf 100644 (file)
@@ -95,6 +95,7 @@ enum dc_dp_training_pattern {
        DP_TRAINING_PATTERN_SEQUENCE_2,
        DP_TRAINING_PATTERN_SEQUENCE_3,
        DP_TRAINING_PATTERN_SEQUENCE_4,
+       DP_TRAINING_PATTERN_VIDEOIDLE,
 };
 
 struct dc_link_settings {
index c51d2d961b7a531c11cc97150ecc3e67c2918ea0..16cc76ce3739d70358e6f5540e343f303a294a0d 100644 (file)
@@ -78,7 +78,8 @@ bool dc_dsc_compute_config(
                const struct dc_crtc_timing *timing,
                struct dc_dsc_config *dsc_cfg);
 
-uint32_t dc_dsc_stream_bandwidth_in_kbps(uint32_t pix_clk_100hz, uint32_t bpp_x16);
+uint32_t dc_dsc_stream_bandwidth_in_kbps(const struct dc_crtc_timing *timing,
+               uint32_t bpp_x16, uint32_t num_slices_h, bool is_dp);
 
 void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing,
                uint32_t max_target_bpp_limit_override_x16,
@@ -88,6 +89,6 @@ void dc_dsc_policy_set_max_target_bpp_limit(uint32_t limit);
 
 void dc_dsc_policy_set_enable_dsc_when_not_needed(bool enable);
 
-uint32_t dc_dsc_stream_bandwidth_in_kbps(uint32_t pix_clk_100hz, uint32_t bpp_x16);
+void dc_dsc_policy_set_disable_dsc_stream_overhead(bool disable);
 
 #endif
index bcec019efa6f02fdc49dd665fc98b917a51d854f..04957a9efab2c8fab438b1b6607b8b1efc382802 100644 (file)
@@ -739,6 +739,7 @@ struct dc_dsc_config {
        uint32_t version_minor; /* DSC minor version. Full version is formed as 1.version_minor. */
        bool ycbcr422_simple; /* Tell DSC engine to convert YCbCr 4:2:2 to 'YCbCr 4:2:2 simple'. */
        int32_t rc_buffer_size; /* DSC RC buffer block size in bytes */
+       bool is_dp; /* indicate if DSC is applied based on DP's capability */
 };
 struct dc_crtc_timing {
        uint32_t h_total;
index fc5622ffec3debcc51a29f14d9e51b076530569f..c871923e7db04de15cd94535073a3f07b20230b8 100644 (file)
@@ -36,12 +36,6 @@ enum dc_link_fec_state {
        dc_link_fec_enabled
 };
 
-enum lttpr_mode {
-       LTTPR_MODE_NON_LTTPR,
-       LTTPR_MODE_TRANSPARENT,
-       LTTPR_MODE_NON_TRANSPARENT,
-};
-
 struct dc_link_status {
        bool link_active;
        struct dpcd_caps *dpcd_caps;
@@ -113,6 +107,7 @@ struct dc_link {
        /* TODO: Rename. Flag an endpoint as having a programmable mapping to a
         * DIG encoder. */
        bool is_dig_mapping_flexible;
+       bool hpd_status; /* HPD status of link without physical HPD pin. */
 
        bool edp_sink_present;
 
@@ -363,9 +358,6 @@ bool dc_link_is_hdcp22(struct dc_link *link, enum signal_type signal);
 void dc_link_set_drive_settings(struct dc *dc,
                                struct link_training_settings *lt_settings,
                                const struct dc_link *link);
-void dc_link_perform_link_training(struct dc *dc,
-                                  struct dc_link_settings *link_setting,
-                                  bool skip_video_pattern);
 void dc_link_set_preferred_link_settings(struct dc *dc,
                                         struct dc_link_settings *link_setting,
                                         struct dc_link *link);
index 432754eaf10b8815aa33d26d18689dcafe8a7b3b..535da8db70b6c0416252ecc019a1200e0e234d31 100644 (file)
@@ -404,7 +404,7 @@ enum dc_connection_type {
        dc_connection_none,
        dc_connection_single,
        dc_connection_mst_branch,
-       dc_connection_active_dongle
+       dc_connection_sst_branch
 };
 
 struct dc_csc_adjustments {
@@ -909,6 +909,7 @@ struct dsc_dec_dpcd_caps {
        uint32_t branch_overall_throughput_0_mps; /* In MPs */
        uint32_t branch_overall_throughput_1_mps; /* In MPs */
        uint32_t branch_max_line_width;
+       bool is_dp;
 };
 
 struct dc_golden_table {
index 87d57e81de1207fdf90d15a57bd09c3fecfdad1b..83d97dfe328ff6ad3fee419b66303b5218adb185 100644 (file)
@@ -595,6 +595,25 @@ int dce_aux_transfer_raw(struct ddc_service *ddc,
        return res;
 }
 
+int dce_aux_transfer_dmub_raw(struct ddc_service *ddc,
+               struct aux_payload *payload,
+               enum aux_return_code_type *operation_result)
+{
+       struct ddc *ddc_pin = ddc->ddc_pin;
+
+       if (ddc_pin != NULL) {
+               struct dce_aux *aux_engine = ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en];
+               /* XXX: Workaround to configure ddc channels for aux transactions */
+               if (!acquire(aux_engine, ddc_pin)) {
+                       *operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE;
+                       return -1;
+               }
+               release_engine(aux_engine);
+       }
+
+       return dm_helper_dmub_aux_transfer_sync(ddc->ctx, ddc->link, payload, operation_result);
+}
+
 #define AUX_MAX_RETRIES 7
 #define AUX_MAX_DEFER_RETRIES 7
 #define AUX_MAX_I2C_DEFER_RETRIES 7
index 566b1bddd8cc6a95d689f6f54778031f79c68bfb..e69f1899fbf054f6b853fe2a9f9ebb1f2a5c30c9 100644 (file)
@@ -304,6 +304,9 @@ int dce_aux_transfer_raw(struct ddc_service *ddc,
                struct aux_payload *cmd,
                enum aux_return_code_type *operation_result);
 
+int dce_aux_transfer_dmub_raw(struct ddc_service *ddc,
+               struct aux_payload *payload,
+               enum aux_return_code_type *operation_result);
 bool dce_aux_transfer_with_retries(struct ddc_service *ddc,
                struct aux_payload *cmd);
 
index eb1698d54a482f6e61b42d5a6111ef572faaf4c7..6939ca2e82124979232d72a1d9fd0b5e5d401dd3 100644 (file)
@@ -56,11 +56,19 @@ static void dmub_abm_enable_fractional_pwm(struct dc_context *dc)
 {
        union dmub_rb_cmd cmd;
        uint32_t fractional_pwm = (dc->dc->config.disable_fractional_pwm == false) ? 1 : 0;
+       uint32_t edp_id_count = dc->dc_edp_id_count;
+       int i;
+       uint8_t panel_mask = 0;
+
+       for (i = 0; i < edp_id_count; i++)
+               panel_mask |= 0x01 << i;
 
        memset(&cmd, 0, sizeof(cmd));
        cmd.abm_set_pwm_frac.header.type = DMUB_CMD__ABM;
        cmd.abm_set_pwm_frac.header.sub_type = DMUB_CMD__ABM_SET_PWM_FRAC;
        cmd.abm_set_pwm_frac.abm_set_pwm_frac_data.fractional_pwm = fractional_pwm;
+       cmd.abm_set_pwm_frac.abm_set_pwm_frac_data.version = DMUB_CMD_ABM_CONTROL_VERSION_1;
+       cmd.abm_set_pwm_frac.abm_set_pwm_frac_data.panel_mask = panel_mask;
        cmd.abm_set_pwm_frac.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pwm_frac_data);
 
        dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd);
@@ -135,11 +143,24 @@ static bool dmub_abm_set_level(struct abm *abm, uint32_t level)
 {
        union dmub_rb_cmd cmd;
        struct dc_context *dc = abm->ctx;
+       struct dc_link *edp_links[MAX_NUM_EDP];
+       int i;
+       int edp_num;
+       uint8_t panel_mask = 0;
+
+       get_edp_links(dc->dc, edp_links, &edp_num);
+
+       for (i = 0; i < edp_num; i++) {
+               if (edp_links[i]->link_status.link_active)
+                       panel_mask |= (0x01 << i);
+       }
 
        memset(&cmd, 0, sizeof(cmd));
        cmd.abm_set_level.header.type = DMUB_CMD__ABM;
        cmd.abm_set_level.header.sub_type = DMUB_CMD__ABM_SET_LEVEL;
        cmd.abm_set_level.abm_set_level_data.level = level;
+       cmd.abm_set_level.abm_set_level_data.version = DMUB_CMD_ABM_CONTROL_VERSION_1;
+       cmd.abm_set_level.abm_set_level_data.panel_mask = panel_mask;
        cmd.abm_set_level.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_level_data);
 
        dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd);
@@ -155,6 +176,12 @@ static bool dmub_abm_init_config(struct abm *abm,
 {
        union dmub_rb_cmd cmd;
        struct dc_context *dc = abm->ctx;
+       uint32_t edp_id_count = dc->dc_edp_id_count;
+       int i;
+       uint8_t panel_mask = 0;
+
+       for (i = 0; i < edp_id_count; i++)
+               panel_mask |= 0x01 << i;
 
        // TODO: Optimize by only reading back final 4 bytes
        dmub_flush_buffer_mem(&dc->dmub_srv->dmub->scratch_mem_fb);
@@ -168,6 +195,9 @@ static bool dmub_abm_init_config(struct abm *abm,
        cmd.abm_init_config.header.sub_type = DMUB_CMD__ABM_INIT_CONFIG;
        cmd.abm_init_config.abm_init_config_data.src.quad_part = dc->dmub_srv->dmub->scratch_mem_fb.gpu_addr;
        cmd.abm_init_config.abm_init_config_data.bytes = bytes;
+       cmd.abm_init_config.abm_init_config_data.version = DMUB_CMD_ABM_CONTROL_VERSION_1;
+       cmd.abm_init_config.abm_init_config_data.panel_mask = panel_mask;
+
        cmd.abm_init_config.header.payload_bytes = sizeof(struct dmub_cmd_abm_init_config_data);
 
        dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd);
index efa86d5c68470cc6832c9a9360310e129adea27f..98ab4b7769241211b76a45b631ea68ad491fd89a 100644 (file)
@@ -496,10 +496,13 @@ static enum lb_memory_config dpp1_dscl_find_lb_memory_config(struct dcn10_dpp *d
        int vtaps_c = scl_data->taps.v_taps_c;
        int ceil_vratio = dc_fixpt_ceil(scl_data->ratios.vert);
        int ceil_vratio_c = dc_fixpt_ceil(scl_data->ratios.vert_c);
-       enum lb_memory_config mem_cfg = LB_MEMORY_CONFIG_0;
 
-       if (dpp->base.ctx->dc->debug.use_max_lb)
-               return mem_cfg;
+       if (dpp->base.ctx->dc->debug.use_max_lb) {
+               if (scl_data->format == PIXEL_FORMAT_420BPP8
+                               || scl_data->format == PIXEL_FORMAT_420BPP10)
+                       return LB_MEMORY_CONFIG_3;
+               return LB_MEMORY_CONFIG_0;
+       }
 
        dpp->base.caps->dscl_calc_lb_num_partitions(
                        scl_data, LB_MEMORY_CONFIG_1, &num_part_y, &num_part_c);
index 7c939c0a977b31afc904552aca8b018c32aa1840..8c2d3d42d9da26f8c49f98573e4a23857ef79f5c 100644 (file)
@@ -2956,35 +2956,6 @@ void dcn10_program_pipe(
 {
        struct dce_hwseq *hws = dc->hwseq;
 
-       if (pipe_ctx->plane_state->update_flags.bits.full_update)
-               dcn10_enable_plane(dc, pipe_ctx, context);
-
-       dcn10_update_dchubp_dpp(dc, pipe_ctx, context);
-
-       hws->funcs.set_hdr_multiplier(pipe_ctx);
-
-       if (pipe_ctx->plane_state->update_flags.bits.full_update ||
-                       pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change ||
-                       pipe_ctx->plane_state->update_flags.bits.gamma_change)
-               hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state);
-
-       /* dcn10_translate_regamma_to_hw_format takes 750us to finish
-        * only do gamma programming for full update.
-        * TODO: This can be further optimized/cleaned up
-        * Always call this for now since it does memcmp inside before
-        * doing heavy calculation and programming
-        */
-       if (pipe_ctx->plane_state->update_flags.bits.full_update)
-               hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream);
-}
-
-static void dcn10_program_all_pipe_in_tree(
-               struct dc *dc,
-               struct pipe_ctx *pipe_ctx,
-               struct dc_state *context)
-{
-       struct dce_hwseq *hws = dc->hwseq;
-
        if (pipe_ctx->top_pipe == NULL) {
                bool blank = !is_pipe_tree_visible(pipe_ctx);
 
@@ -3004,35 +2975,26 @@ static void dcn10_program_all_pipe_in_tree(
                hws->funcs.blank_pixel_data(dc, pipe_ctx, blank);
        }
 
-       if (pipe_ctx->plane_state != NULL)
-               hws->funcs.program_pipe(dc, pipe_ctx, context);
-
-       if (pipe_ctx->bottom_pipe != NULL && pipe_ctx->bottom_pipe != pipe_ctx)
-               dcn10_program_all_pipe_in_tree(dc, pipe_ctx->bottom_pipe, context);
-}
-
-static struct pipe_ctx *dcn10_find_top_pipe_for_stream(
-               struct dc *dc,
-               struct dc_state *context,
-               const struct dc_stream_state *stream)
-{
-       int i;
+       if (pipe_ctx->plane_state->update_flags.bits.full_update)
+               dcn10_enable_plane(dc, pipe_ctx, context);
 
-       for (i = 0; i < dc->res_pool->pipe_count; i++) {
-               struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-               struct pipe_ctx *old_pipe_ctx =
-                               &dc->current_state->res_ctx.pipe_ctx[i];
+       dcn10_update_dchubp_dpp(dc, pipe_ctx, context);
 
-               if (!pipe_ctx->plane_state && !old_pipe_ctx->plane_state)
-                       continue;
+       hws->funcs.set_hdr_multiplier(pipe_ctx);
 
-               if (pipe_ctx->stream != stream)
-                       continue;
+       if (pipe_ctx->plane_state->update_flags.bits.full_update ||
+                       pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change ||
+                       pipe_ctx->plane_state->update_flags.bits.gamma_change)
+               hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state);
 
-               if (!pipe_ctx->top_pipe && !pipe_ctx->prev_odm_pipe)
-                       return pipe_ctx;
-       }
-       return NULL;
+       /* dcn10_translate_regamma_to_hw_format takes 750us to finish
+        * only do gamma programming for full update.
+        * TODO: This can be further optimized/cleaned up
+        * Always call this for now since it does memcmp inside before
+        * doing heavy calculation and programming
+        */
+       if (pipe_ctx->plane_state->update_flags.bits.full_update)
+               hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream);
 }
 
 void dcn10_wait_for_pending_cleared(struct dc *dc,
@@ -3067,91 +3029,6 @@ void dcn10_wait_for_pending_cleared(struct dc *dc,
                }
 }
 
-void dcn10_apply_ctx_for_surface(
-               struct dc *dc,
-               const struct dc_stream_state *stream,
-               int num_planes,
-               struct dc_state *context)
-{
-       struct dce_hwseq *hws = dc->hwseq;
-       int i;
-       struct timing_generator *tg;
-       uint32_t underflow_check_delay_us;
-       bool interdependent_update = false;
-       struct pipe_ctx *top_pipe_to_program =
-                       dcn10_find_top_pipe_for_stream(dc, context, stream);
-       DC_LOGGER_INIT(dc->ctx->logger);
-
-       // Clear pipe_ctx flag
-       for (i = 0; i < dc->res_pool->pipe_count; i++) {
-               struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-               pipe_ctx->update_flags.raw = 0;
-       }
-
-       if (!top_pipe_to_program)
-               return;
-
-       tg = top_pipe_to_program->stream_res.tg;
-
-       interdependent_update = top_pipe_to_program->plane_state &&
-               top_pipe_to_program->plane_state->update_flags.bits.full_update;
-
-       underflow_check_delay_us = dc->debug.underflow_assert_delay_us;
-
-       if (underflow_check_delay_us != 0xFFFFFFFF && hws->funcs.did_underflow_occur)
-               ASSERT(hws->funcs.did_underflow_occur(dc, top_pipe_to_program));
-
-       if (underflow_check_delay_us != 0xFFFFFFFF)
-               udelay(underflow_check_delay_us);
-
-       if (underflow_check_delay_us != 0xFFFFFFFF && hws->funcs.did_underflow_occur)
-               ASSERT(hws->funcs.did_underflow_occur(dc, top_pipe_to_program));
-
-       if (num_planes == 0) {
-               /* OTG blank before remove all front end */
-               hws->funcs.blank_pixel_data(dc, top_pipe_to_program, true);
-       }
-
-       /* Disconnect unused mpcc */
-       for (i = 0; i < dc->res_pool->pipe_count; i++) {
-               struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-               struct pipe_ctx *old_pipe_ctx =
-                               &dc->current_state->res_ctx.pipe_ctx[i];
-
-               if ((!pipe_ctx->plane_state ||
-                    pipe_ctx->stream_res.tg != old_pipe_ctx->stream_res.tg) &&
-                   old_pipe_ctx->plane_state &&
-                   old_pipe_ctx->stream_res.tg == tg) {
-
-                       hws->funcs.plane_atomic_disconnect(dc, old_pipe_ctx);
-                       pipe_ctx->update_flags.bits.disable = 1;
-
-                       DC_LOG_DC("Reset mpcc for pipe %d\n",
-                                       old_pipe_ctx->pipe_idx);
-               }
-       }
-
-       if (num_planes > 0)
-               dcn10_program_all_pipe_in_tree(dc, top_pipe_to_program, context);
-
-       /* Program secondary blending tree and writeback pipes */
-       if ((stream->num_wb_info > 0) && (hws->funcs.program_all_writeback_pipes_in_tree))
-               hws->funcs.program_all_writeback_pipes_in_tree(dc, stream, context);
-       if (interdependent_update)
-               for (i = 0; i < dc->res_pool->pipe_count; i++) {
-                       struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-                       /* Skip inactive pipes and ones already updated */
-                       if (!pipe_ctx->stream || pipe_ctx->stream == stream ||
-                           !pipe_ctx->plane_state || !tg->funcs->is_tg_enabled(tg))
-                               continue;
-
-                       pipe_ctx->plane_res.hubp->funcs->hubp_setup_interdependent(
-                               pipe_ctx->plane_res.hubp,
-                               &pipe_ctx->dlg_regs,
-                               &pipe_ctx->ttu_regs);
-               }
-}
-
 void dcn10_post_unlock_program_front_end(
                struct dc *dc,
                struct dc_state *context)
index 37bec421fde8e34c70d32cb986246193c79ea666..c9bdffe5989bc0e6552392b9fcb70d8f3dba68df 100644 (file)
@@ -80,11 +80,6 @@ void dcn10_lock_all_pipes(
                struct dc *dc,
                struct dc_state *context,
                bool lock);
-void dcn10_apply_ctx_for_surface(
-               struct dc *dc,
-               const struct dc_stream_state *stream,
-               int num_planes,
-               struct dc_state *context);
 void dcn10_post_unlock_program_front_end(
                struct dc *dc,
                struct dc_state *context);
index d532c78ee76472087fcdb37475b9053b0305f1ed..680ca53455a2e2cf127fb0e4b746128bf4102023 100644 (file)
 #include "hw_sequencer_private.h"
 #include "dce110/dce110_hw_sequencer.h"
 #include "dcn10_hw_sequencer.h"
+#include "dcn20/dcn20_hwseq.h"
 
 static const struct hw_sequencer_funcs dcn10_funcs = {
        .program_gamut_remap = dcn10_program_gamut_remap,
        .init_hw = dcn10_init_hw,
        .power_down_on_boot = dcn10_power_down_on_boot,
        .apply_ctx_to_hw = dce110_apply_ctx_to_hw,
-       .apply_ctx_for_surface = dcn10_apply_ctx_for_surface,
+       .apply_ctx_for_surface = NULL,
+       .program_front_end_for_ctx = dcn20_program_front_end_for_ctx,
        .post_unlock_program_front_end = dcn10_post_unlock_program_front_end,
        .wait_for_pending_cleared = dcn10_wait_for_pending_cleared,
        .update_plane_addr = dcn10_update_plane_addr,
index 8dc3d1f7398422e49bc7823e3c5bad56ab111673..2feb051a200294a0d88ec78616a68d2598117b58 100644 (file)
@@ -482,7 +482,7 @@ bool dpp20_program_blnd_lut(
                next_mode = LUT_RAM_A;
 
        dpp20_power_on_blnd_lut(dpp_base, true);
-       dpp20_configure_blnd_lut(dpp_base, next_mode == LUT_RAM_A ? true:false);
+       dpp20_configure_blnd_lut(dpp_base, next_mode == LUT_RAM_A);
 
        if (next_mode == LUT_RAM_A)
                dpp20_program_blnd_luta_settings(dpp_base, params);
@@ -893,7 +893,7 @@ bool dpp20_program_shaper(
        else
                next_mode = LUT_RAM_A;
 
-       dpp20_configure_shaper_lut(dpp_base, next_mode == LUT_RAM_A ? true:false);
+       dpp20_configure_shaper_lut(dpp_base, next_mode == LUT_RAM_A);
 
        if (next_mode == LUT_RAM_A)
                dpp20_program_shaper_luta_settings(dpp_base, params);
index 6a10daec15ccd03b096755f3cee40eccf12edf45..484a30592987f26733ffc2466227cbcaa20472d6 100644 (file)
@@ -1700,7 +1700,11 @@ void dcn20_program_front_end_for_ctx(
 
                if (pipe->plane_state && !pipe->top_pipe) {
                        while (pipe) {
-                               dcn20_program_pipe(dc, pipe, context);
+                               if (hws->funcs.program_pipe)
+                                       hws->funcs.program_pipe(dc, pipe, context);
+                               else
+                                       dcn20_program_pipe(dc, pipe, context);
+
                                pipe = pipe->bottom_pipe;
                        }
                        /* Program secondary blending tree and writeback pipes */
index 527e56c353cb77514957fbaec024f23315e98cbf..6a56a03cfba3d68d950a624cf3c39069a13a7570 100644 (file)
@@ -3667,7 +3667,7 @@ static bool dcn20_resource_construct(
        int i;
        struct dc_context *ctx = dc->ctx;
        struct irq_service_init_data init_data;
-       struct ddc_service_init_data ddc_init_data;
+       struct ddc_service_init_data ddc_init_data = {0};
        struct _vcs_dpi_soc_bounding_box_st *loaded_bb =
                        get_asic_rev_soc_bb(ctx->asic_id.hw_internal_rev);
        struct _vcs_dpi_ip_params_st *loaded_ip =
index 8fccee5a3036f2de45e4180884a15e4be0f4885e..69cc192a7e7191bc2d3bccb97a4df62ed4caed99 100644 (file)
@@ -218,7 +218,7 @@ bool dcn21_set_backlight_level(struct pipe_ctx *pipe_ctx,
        cmd.abm_set_backlight.header.sub_type = DMUB_CMD__ABM_SET_BACKLIGHT;
        cmd.abm_set_backlight.abm_set_backlight_data.frame_ramp = frame_ramp;
        cmd.abm_set_backlight.abm_set_backlight_data.backlight_user_level = backlight_pwm_u16_16;
-       cmd.abm_set_backlight.abm_set_backlight_data.version = DMUB_CMD_ABM_SET_BACKLIGHT_VERSION_1;
+       cmd.abm_set_backlight.abm_set_backlight_data.version = DMUB_CMD_ABM_CONTROL_VERSION_1;
        cmd.abm_set_backlight.abm_set_backlight_data.panel_mask = (0x01 << panel_cntl->inst);
        cmd.abm_set_backlight.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_backlight_data);
 
index 8e3f1d0b4cc3ca995f867643d55fc5485ba9f337..38a2aa87f5f5ecf915610fbda507aeb6651d3e3c 100644 (file)
@@ -1575,10 +1575,12 @@ static struct _vcs_dpi_voltage_scaling_st construct_low_pstate_lvl(struct clk_li
        low_pstate_lvl.phyclk_d18_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].phyclk_d18_mhz;
        low_pstate_lvl.phyclk_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].phyclk_mhz;
 
-       for (i = clk_table->num_entries; i > 1; i--)
-               clk_table->entries[i] = clk_table->entries[i-1];
-       clk_table->entries[1] = clk_table->entries[0];
-       clk_table->num_entries++;
+       if (clk_table->num_entries < MAX_NUM_DPM_LVL) {
+               for (i = clk_table->num_entries; i > 1; i--)
+                       clk_table->entries[i] = clk_table->entries[i-1];
+               clk_table->entries[1] = clk_table->entries[0];
+               clk_table->num_entries++;
+       }
 
        return low_pstate_lvl;
 }
@@ -1610,10 +1612,6 @@ static void update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
                        }
                }
 
-               /* clk_table[1] is reserved for min DF PState.  skip here to fill in later. */
-               if (i == 1)
-                       k++;
-
                clock_limits[k].state = k;
                clock_limits[k].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
                clock_limits[k].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
@@ -1630,14 +1628,25 @@ static void update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
 
                k++;
        }
-       for (i = 0; i < clk_table->num_entries + 1; i++)
-               dcn2_1_soc.clock_limits[i] = clock_limits[i];
+
+       if (clk_table->num_entries >= MAX_NUM_DPM_LVL) {
+               for (i = 0; i < clk_table->num_entries + 1; i++)
+                       dcn2_1_soc.clock_limits[i] = clock_limits[i];
+       } else {
+               dcn2_1_soc.clock_limits[0] = clock_limits[0];
+               for (i = 2; i < clk_table->num_entries + 1; i++) {
+                       dcn2_1_soc.clock_limits[i] = clock_limits[i - 1];
+                       dcn2_1_soc.clock_limits[i].state = i;
+               }
+       }
+
        if (clk_table->num_entries) {
-               dcn2_1_soc.num_states = clk_table->num_entries + 1;
                /* fill in min DF PState */
                dcn2_1_soc.clock_limits[1] = construct_low_pstate_lvl(clk_table, closest_clk_lvl);
+               dcn2_1_soc.num_states = clk_table->num_entries;
                /* duplicate last level */
-               dcn2_1_soc.clock_limits[dcn2_1_soc.num_states] = dcn2_1_soc.clock_limits[dcn2_1_soc.num_states - 1];
+               dcn2_1_soc.clock_limits[dcn2_1_soc.num_states] =
+                       dcn2_1_soc.clock_limits[dcn2_1_soc.num_states - 1];
                dcn2_1_soc.clock_limits[dcn2_1_soc.num_states].state = dcn2_1_soc.num_states;
        }
 
index 0b1755f1dea81326ab24cd23e9a9e6e92ef75ffb..9566b9037458854dd4acc1eeac1162963ddc8545 100644 (file)
@@ -85,7 +85,9 @@
        SRI(DP_MSE_RATE_UPDATE, DP, id), \
        SRI(DP_PIXEL_FORMAT, DP, id), \
        SRI(DP_SEC_CNTL, DP, id), \
+       SRI(DP_SEC_CNTL1, DP, id), \
        SRI(DP_SEC_CNTL2, DP, id), \
+       SRI(DP_SEC_CNTL5, DP, id), \
        SRI(DP_SEC_CNTL6, DP, id), \
        SRI(DP_STEER_FIFO, DP, id), \
        SRI(DP_VID_M, DP, id), \
        SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP5_ENABLE, mask_sh),\
        SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP6_ENABLE, mask_sh),\
        SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP7_ENABLE, mask_sh),\
+       SE_SF(DP0_DP_SEC_CNTL1, DP_SEC_GSP5_LINE_REFERENCE, mask_sh),\
        SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP7_SEND, mask_sh),\
+       SE_SF(DP0_DP_SEC_CNTL5, DP_SEC_GSP5_LINE_NUM, mask_sh),\
        SE_SF(DP0_DP_SEC_CNTL6, DP_SEC_GSP7_LINE_NUM, mask_sh),\
        SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP11_PPS, mask_sh),\
        SE_SF(DP0_DP_GSP11_CNTL, DP_SEC_GSP11_ENABLE, mask_sh),\
index 910c17fd42789326500d5dc979b39402d8253fa8..950c9bfd53de516038648cb4a1df8169d1390d40 100644 (file)
@@ -874,7 +874,7 @@ bool mpc3_program_shaper(
        else
                next_mode = LUT_RAM_A;
 
-       mpc3_configure_shaper_lut(mpc, next_mode == LUT_RAM_A ? true:false, rmu_idx);
+       mpc3_configure_shaper_lut(mpc, next_mode == LUT_RAM_A, rmu_idx);
 
        if (next_mode == LUT_RAM_A)
                mpc3_program_shaper_luta_settings(mpc, params, rmu_idx);
index 8980c90b2277a242ea8c59a9481fdddb8927ba7d..ac478bdcfb2ade75aa51caeea61066d8c1dd08cb 100644 (file)
@@ -97,7 +97,7 @@ void optc3_lock_doublebuffer_disable(struct timing_generator *optc)
                MASTER_UPDATE_LOCK_DB_END_Y, 0);
 
        REG_UPDATE(OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, 0);
-       REG_UPDATE(OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_EN, 1);
+       REG_UPDATE(OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_EN, 0);
 }
 
 void optc3_lock(struct timing_generator *optc)
index 4a5fa23d8e7b06911d13ed22c6e197d74df2b8f8..45f96221a094b491fb0e59edaae2d5f1d45f5538 100644 (file)
@@ -2538,7 +2538,7 @@ static bool dcn30_resource_construct(
        int i;
        struct dc_context *ctx = dc->ctx;
        struct irq_service_init_data init_data;
-       struct ddc_service_init_data ddc_init_data;
+       struct ddc_service_init_data ddc_init_data = {0};
        uint32_t pipe_fuses = read_pipe_fuses(ctx);
        uint32_t num_pipes = 0;
 
index 7617fab9e1f95dcb8232a2ddd9afd86d4c4c75c0..304d50d16d01cbf32b994a197351c76da0e292b6 100644 (file)
@@ -34,6 +34,8 @@
 #include "dc.h"
 
 struct dp_mst_stream_allocation_table;
+struct aux_payload;
+enum aux_return_code_type;
 
 /*
  * Allocate memory accessible by the GPU
@@ -158,6 +160,11 @@ void dm_set_dcn_clocks(
                struct dc_context *ctx,
                struct dc_clocks *clks);
 
-bool dm_helpers_dmub_outbox0_interrupt_control(struct dc_context *ctx, bool enable);
+bool dm_helpers_dmub_outbox_interrupt_control(struct dc_context *ctx, bool enable);
 
+int dm_helper_dmub_aux_transfer_sync(
+               struct dc_context *ctx,
+               const struct dc_link *link,
+               struct aux_payload *payload,
+               enum aux_return_code_type *operation_result);
 #endif /* __DM_HELPERS__ */
index 9729cf292e8494a848bfeba92e180a3e9110b943..d3b5b6fedf042f21fa404396cf64a3a306e2a26f 100644 (file)
@@ -2895,7 +2895,7 @@ static void dml20_DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
                        RoundedUpMaxSwathSizeBytesC = 0.0;
 
                if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
-                               <= mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0) {
+                               <= mode_lib->vba.DETBufferSizeInKByte[0] * 1024.0 / 2.0) {
                        mode_lib->vba.SwathHeightY[k] = MaximumSwathHeightY;
                        mode_lib->vba.SwathHeightC[k] = MaximumSwathHeightC;
                } else {
@@ -2904,17 +2904,17 @@ static void dml20_DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
                }
 
                if (mode_lib->vba.SwathHeightC[k] == 0) {
-                       mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte * 1024;
+                       mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte[0] * 1024;
                        mode_lib->vba.DETBufferSizeC[k] = 0;
                } else if (mode_lib->vba.SwathHeightY[k] <= mode_lib->vba.SwathHeightC[k]) {
-                       mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte
+                       mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte[0]
                                        * 1024.0 / 2;
-                       mode_lib->vba.DETBufferSizeC[k] = mode_lib->vba.DETBufferSizeInKByte
+                       mode_lib->vba.DETBufferSizeC[k] = mode_lib->vba.DETBufferSizeInKByte[0]
                                        * 1024.0 / 2;
                } else {
-                       mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte
+                       mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte[0]
                                        * 1024.0 * 2 / 3;
-                       mode_lib->vba.DETBufferSizeC[k] = mode_lib->vba.DETBufferSizeInKByte
+                       mode_lib->vba.DETBufferSizeC[k] = mode_lib->vba.DETBufferSizeInKByte[0]
                                        * 1024.0 / 3;
                }
        }
@@ -3819,7 +3819,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
                mode_lib->vba.MaximumSwathWidthInDETBuffer =
                                dml_min(
                                                mode_lib->vba.MaximumSwathWidthSupport,
-                                               mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0
+                                               mode_lib->vba.DETBufferSizeInKByte[0] * 1024.0 / 2.0
                                                                / (locals->BytePerPixelInDETY[k]
                                                                                * locals->MinSwathHeightY[k]
                                                                                + locals->BytePerPixelInDETC[k]
@@ -4322,7 +4322,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
                                        locals->RoundedUpMaxSwathSizeBytesC = 0;
                                }
 
-                               if (locals->RoundedUpMaxSwathSizeBytesY + locals->RoundedUpMaxSwathSizeBytesC <= locals->DETBufferSizeInKByte * 1024 / 2) {
+                               if (locals->RoundedUpMaxSwathSizeBytesY + locals->RoundedUpMaxSwathSizeBytesC <= locals->DETBufferSizeInKByte[0] * 1024 / 2) {
                                        locals->SwathHeightYPerState[i][j][k] = locals->MaxSwathHeightY[k];
                                        locals->SwathHeightCPerState[i][j][k] = locals->MaxSwathHeightC[k];
                                } else {
@@ -4331,15 +4331,15 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
                                }
 
                                if (locals->BytePerPixelInDETC[k] == 0) {
-                                       locals->LinesInDETLuma = locals->DETBufferSizeInKByte * 1024 / locals->BytePerPixelInDETY[k] / locals->SwathWidthYPerState[i][j][k];
+                                       locals->LinesInDETLuma = locals->DETBufferSizeInKByte[0] * 1024 / locals->BytePerPixelInDETY[k] / locals->SwathWidthYPerState[i][j][k];
                                        locals->LinesInDETChroma = 0;
                                } else if (locals->SwathHeightYPerState[i][j][k] <= locals->SwathHeightCPerState[i][j][k]) {
-                                       locals->LinesInDETLuma = locals->DETBufferSizeInKByte * 1024 / 2 / locals->BytePerPixelInDETY[k] /
+                                       locals->LinesInDETLuma = locals->DETBufferSizeInKByte[0] * 1024 / 2 / locals->BytePerPixelInDETY[k] /
                                                        locals->SwathWidthYPerState[i][j][k];
-                                       locals->LinesInDETChroma = locals->DETBufferSizeInKByte * 1024 / 2 / locals->BytePerPixelInDETC[k] / (locals->SwathWidthYPerState[i][j][k] / 2);
+                                       locals->LinesInDETChroma = locals->DETBufferSizeInKByte[0] * 1024 / 2 / locals->BytePerPixelInDETC[k] / (locals->SwathWidthYPerState[i][j][k] / 2);
                                } else {
-                                       locals->LinesInDETLuma = locals->DETBufferSizeInKByte * 1024 * 2 / 3 / locals->BytePerPixelInDETY[k] / locals->SwathWidthYPerState[i][j][k];
-                                       locals->LinesInDETChroma = locals->DETBufferSizeInKByte * 1024 / 3 / locals->BytePerPixelInDETY[k] / (locals->SwathWidthYPerState[i][j][k] / 2);
+                                       locals->LinesInDETLuma = locals->DETBufferSizeInKByte[0] * 1024 * 2 / 3 / locals->BytePerPixelInDETY[k] / locals->SwathWidthYPerState[i][j][k];
+                                       locals->LinesInDETChroma = locals->DETBufferSizeInKByte[0] * 1024 / 3 / locals->BytePerPixelInDETY[k] / (locals->SwathWidthYPerState[i][j][k] / 2);
                                }
 
                                locals->EffectiveLBLatencyHidingSourceLinesLuma = dml_min(locals->MaxLineBufferLines,
index 51098c2c9854867ad4d8a6460051a64f887831d2..fbed5304692d230bf93d34bcd737746a87ddb7bc 100644 (file)
@@ -2968,7 +2968,7 @@ static void dml20v2_DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
                        RoundedUpMaxSwathSizeBytesC = 0.0;
 
                if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
-                               <= mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0) {
+                               <= mode_lib->vba.DETBufferSizeInKByte[0] * 1024.0 / 2.0) {
                        mode_lib->vba.SwathHeightY[k] = MaximumSwathHeightY;
                        mode_lib->vba.SwathHeightC[k] = MaximumSwathHeightC;
                } else {
@@ -2977,17 +2977,17 @@ static void dml20v2_DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
                }
 
                if (mode_lib->vba.SwathHeightC[k] == 0) {
-                       mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte * 1024;
+                       mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte[0] * 1024;
                        mode_lib->vba.DETBufferSizeC[k] = 0;
                } else if (mode_lib->vba.SwathHeightY[k] <= mode_lib->vba.SwathHeightC[k]) {
-                       mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte
+                       mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte[0]
                                        * 1024.0 / 2;
-                       mode_lib->vba.DETBufferSizeC[k] = mode_lib->vba.DETBufferSizeInKByte
+                       mode_lib->vba.DETBufferSizeC[k] = mode_lib->vba.DETBufferSizeInKByte[0]
                                        * 1024.0 / 2;
                } else {
-                       mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte
+                       mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte[0]
                                        * 1024.0 * 2 / 3;
-                       mode_lib->vba.DETBufferSizeC[k] = mode_lib->vba.DETBufferSizeInKByte
+                       mode_lib->vba.DETBufferSizeC[k] = mode_lib->vba.DETBufferSizeInKByte[0]
                                        * 1024.0 / 3;
                }
        }
@@ -3926,7 +3926,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
                mode_lib->vba.MaximumSwathWidthInDETBuffer =
                                dml_min(
                                                mode_lib->vba.MaximumSwathWidthSupport,
-                                               mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0
+                                               mode_lib->vba.DETBufferSizeInKByte[0] * 1024.0 / 2.0
                                                                / (locals->BytePerPixelInDETY[k]
                                                                                * locals->MinSwathHeightY[k]
                                                                                + locals->BytePerPixelInDETC[k]
@@ -4443,7 +4443,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
                                        locals->RoundedUpMaxSwathSizeBytesC = 0;
                                }
 
-                               if (locals->RoundedUpMaxSwathSizeBytesY + locals->RoundedUpMaxSwathSizeBytesC <= locals->DETBufferSizeInKByte * 1024 / 2) {
+                               if (locals->RoundedUpMaxSwathSizeBytesY + locals->RoundedUpMaxSwathSizeBytesC <= locals->DETBufferSizeInKByte[0] * 1024 / 2) {
                                        locals->SwathHeightYPerState[i][j][k] = locals->MaxSwathHeightY[k];
                                        locals->SwathHeightCPerState[i][j][k] = locals->MaxSwathHeightC[k];
                                } else {
@@ -4452,15 +4452,15 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
                                }
 
                                if (locals->BytePerPixelInDETC[k] == 0) {
-                                       locals->LinesInDETLuma = locals->DETBufferSizeInKByte * 1024 / locals->BytePerPixelInDETY[k] / locals->SwathWidthYPerState[i][j][k];
+                                       locals->LinesInDETLuma = locals->DETBufferSizeInKByte[0] * 1024 / locals->BytePerPixelInDETY[k] / locals->SwathWidthYPerState[i][j][k];
                                        locals->LinesInDETChroma = 0;
                                } else if (locals->SwathHeightYPerState[i][j][k] <= locals->SwathHeightCPerState[i][j][k]) {
-                                       locals->LinesInDETLuma = locals->DETBufferSizeInKByte * 1024 / 2 / locals->BytePerPixelInDETY[k] /
+                                       locals->LinesInDETLuma = locals->DETBufferSizeInKByte[0] * 1024 / 2 / locals->BytePerPixelInDETY[k] /
                                                        locals->SwathWidthYPerState[i][j][k];
-                                       locals->LinesInDETChroma = locals->DETBufferSizeInKByte * 1024 / 2 / locals->BytePerPixelInDETC[k] / (locals->SwathWidthYPerState[i][j][k] / 2);
+                                       locals->LinesInDETChroma = locals->DETBufferSizeInKByte[0] * 1024 / 2 / locals->BytePerPixelInDETC[k] / (locals->SwathWidthYPerState[i][j][k] / 2);
                                } else {
-                                       locals->LinesInDETLuma = locals->DETBufferSizeInKByte * 1024 * 2 / 3 / locals->BytePerPixelInDETY[k] / locals->SwathWidthYPerState[i][j][k];
-                                       locals->LinesInDETChroma = locals->DETBufferSizeInKByte * 1024 / 3 / locals->BytePerPixelInDETY[k] / (locals->SwathWidthYPerState[i][j][k] / 2);
+                                       locals->LinesInDETLuma = locals->DETBufferSizeInKByte[0] * 1024 * 2 / 3 / locals->BytePerPixelInDETY[k] / locals->SwathWidthYPerState[i][j][k];
+                                       locals->LinesInDETChroma = locals->DETBufferSizeInKByte[0] * 1024 / 3 / locals->BytePerPixelInDETY[k] / (locals->SwathWidthYPerState[i][j][k] / 2);
                                }
 
                                locals->EffectiveLBLatencyHidingSourceLinesLuma = dml_min(locals->MaxLineBufferLines,
index 398210d1af34fcb8eaf3270833d71bfb30ba30f0..c26e742e81377384fe8a1533083871f3e28e59e6 100644 (file)
@@ -148,7 +148,7 @@ static double CalculateDCCConfiguration(
                bool                 DCCProgrammingAssumesScanDirectionUnknown,
                unsigned int         ViewportWidth,
                unsigned int         ViewportHeight,
-               double               DETBufferSize,
+               unsigned int         DETBufferSize,
                unsigned int         RequestHeight256Byte,
                unsigned int         SwathHeight,
                enum dm_swizzle_mode TilingFormat,
@@ -289,7 +289,7 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
                unsigned int MaxLineBufferLines,
                unsigned int LineBufferSize,
                unsigned int DPPOutputBufferPixels,
-               double DETBufferSizeInKByte,
+               unsigned int DETBufferSizeInKByte,
                unsigned int WritebackInterfaceLumaBufferSize,
                unsigned int WritebackInterfaceChromaBufferSize,
                double DCFCLK,
@@ -354,11 +354,11 @@ static void CalculateDCFCLKDeepSleep(
                double DPPCLK[],
                double *DCFCLKDeepSleep);
 static void CalculateDETBufferSize(
-               double DETBufferSizeInKByte,
+               unsigned int DETBufferSizeInKByte,
                unsigned int SwathHeightY,
                unsigned int SwathHeightC,
-               double *DETBufferSizeY,
-               double *DETBufferSizeC);
+               unsigned int *DETBufferSizeY,
+               unsigned int *DETBufferSizeC);
 static void CalculateUrgentBurstFactor(
                unsigned int DETBufferSizeInKByte,
                unsigned int SwathHeightY,
@@ -1074,7 +1074,7 @@ static double CalculateDCCConfiguration(
                bool DCCProgrammingAssumesScanDirectionUnknown,
                unsigned int ViewportWidth,
                unsigned int ViewportHeight,
-               double DETBufferSize,
+               unsigned int DETBufferSize,
                unsigned int RequestHeight256Byte,
                unsigned int SwathHeight,
                enum dm_swizzle_mode TilingFormat,
@@ -2246,7 +2246,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
                        }
 
                        CalculateUrgentBurstFactor(
-                                       mode_lib->vba.DETBufferSizeInKByte,
+                                       mode_lib->vba.DETBufferSizeInKByte[0],
                                        mode_lib->vba.SwathHeightY[k],
                                        mode_lib->vba.SwathHeightC[k],
                                        locals->SwathWidthY[k],
@@ -2415,7 +2415,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
                                mode_lib->vba.MaxLineBufferLines,
                                mode_lib->vba.LineBufferSize,
                                mode_lib->vba.DPPOutputBufferPixels,
-                               mode_lib->vba.DETBufferSizeInKByte,
+                               mode_lib->vba.DETBufferSizeInKByte[0],
                                mode_lib->vba.WritebackInterfaceLumaBufferSize,
                                mode_lib->vba.WritebackInterfaceChromaBufferSize,
                                mode_lib->vba.DCFCLK,
@@ -2588,7 +2588,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
                        false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
                        mode_lib->vba.ViewportWidth[k],
                        mode_lib->vba.ViewportHeight[k],
-                       mode_lib->vba.DETBufferSizeInKByte * 1024,
+                       mode_lib->vba.DETBufferSizeInKByte[0] * 1024,
                        locals->BlockHeight256BytesY[k],
                        mode_lib->vba.SwathHeightY[k],
                        mode_lib->vba.SurfaceTiling[k],
@@ -2689,13 +2689,13 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
        // Stutter Efficiency
        for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
                CalculateDETBufferSize(
-                       mode_lib->vba.DETBufferSizeInKByte,
+                       mode_lib->vba.DETBufferSizeInKByte[0],
                        mode_lib->vba.SwathHeightY[k],
                        mode_lib->vba.SwathHeightC[k],
                        &locals->DETBufferSizeY[k],
                        &locals->DETBufferSizeC[k]);
 
-               locals->LinesInDETY[k] = locals->DETBufferSizeY[k]
+               locals->LinesInDETY[k] = (double)locals->DETBufferSizeY[k]
                                / locals->BytePerPixelDETY[k] / locals->SwathWidthY[k];
                locals->LinesInDETYRoundedDownToSwath[k] = dml_floor(
                                locals->LinesInDETY[k],
@@ -2984,7 +2984,7 @@ static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
                        RoundedUpMaxSwathSizeBytesC = 0.0;
 
                if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
-                               <= mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0) {
+                               <= mode_lib->vba.DETBufferSizeInKByte[0] * 1024.0 / 2.0) {
                        mode_lib->vba.SwathHeightY[k] = MaximumSwathHeightY;
                        mode_lib->vba.SwathHeightC[k] = MaximumSwathHeightC;
                } else {
@@ -2993,7 +2993,7 @@ static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
                }
 
                CalculateDETBufferSize(
-                               mode_lib->vba.DETBufferSizeInKByte,
+                               mode_lib->vba.DETBufferSizeInKByte[0],
                                mode_lib->vba.SwathHeightY[k],
                                mode_lib->vba.SwathHeightC[k],
                                &mode_lib->vba.DETBufferSizeY[k],
@@ -3888,7 +3888,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
                mode_lib->vba.MaximumSwathWidthInDETBuffer =
                                dml_min(
                                                mode_lib->vba.MaximumSwathWidthSupport,
-                                               mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0
+                                               mode_lib->vba.DETBufferSizeInKByte[0] * 1024.0 / 2.0
                                                                / (locals->BytePerPixelInDETY[k]
                                                                                * locals->MinSwathHeightY[k]
                                                                                + locals->BytePerPixelInDETC[k]
@@ -4437,7 +4437,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
                                        mode_lib->vba.RoundedUpMaxSwathSizeBytesC = 0.0;
                                }
                                if (mode_lib->vba.RoundedUpMaxSwathSizeBytesY + mode_lib->vba.RoundedUpMaxSwathSizeBytesC
-                                               <= mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0) {
+                                               <= mode_lib->vba.DETBufferSizeInKByte[0] * 1024.0 / 2.0) {
                                        locals->SwathHeightYThisState[k] = locals->MaxSwathHeightY[k];
                                        locals->SwathHeightCThisState[k] = locals->MaxSwathHeightC[k];
                                } else {
@@ -4801,7 +4801,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
                                        }
 
                                        CalculateUrgentBurstFactor(
-                                                       mode_lib->vba.DETBufferSizeInKByte,
+                                                       mode_lib->vba.DETBufferSizeInKByte[0],
                                                        locals->SwathHeightYThisState[k],
                                                        locals->SwathHeightCThisState[k],
                                                        locals->SwathWidthYThisState[k],
@@ -4975,7 +4975,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
                                        mode_lib->vba.MaxLineBufferLines,
                                        mode_lib->vba.LineBufferSize,
                                        mode_lib->vba.DPPOutputBufferPixels,
-                                       mode_lib->vba.DETBufferSizeInKByte,
+                                       mode_lib->vba.DETBufferSizeInKByte[0],
                                        mode_lib->vba.WritebackInterfaceLumaBufferSize,
                                        mode_lib->vba.WritebackInterfaceChromaBufferSize,
                                        mode_lib->vba.DCFCLKPerState[i],
@@ -5230,7 +5230,7 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
                unsigned int MaxLineBufferLines,
                unsigned int LineBufferSize,
                unsigned int DPPOutputBufferPixels,
-               double DETBufferSizeInKByte,
+               unsigned int DETBufferSizeInKByte,
                unsigned int WritebackInterfaceLumaBufferSize,
                unsigned int WritebackInterfaceChromaBufferSize,
                double DCFCLK,
@@ -5285,8 +5285,8 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
        double EffectiveLBLatencyHidingC;
        double DPPOutputBufferLinesY;
        double DPPOutputBufferLinesC;
-       double DETBufferSizeY;
-       double DETBufferSizeC;
+       unsigned int DETBufferSizeY;
+       unsigned int DETBufferSizeC;
        double LinesInDETY[DC__NUM_DPP__MAX];
        double LinesInDETC;
        unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
@@ -5382,12 +5382,12 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
                                &DETBufferSizeY,
                                &DETBufferSizeC);
 
-               LinesInDETY[k] = DETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
+               LinesInDETY[k] = (double)DETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
                LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
                FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k]
                                * (HTotal[k] / PixelClock[k]) / VRatio[k];
                if (BytePerPixelDETC[k] > 0) {
-                       LinesInDETC = DETBufferSizeC / BytePerPixelDETC[k] / (SwathWidthY[k] / 2.0);
+                       LinesInDETC = (double)DETBufferSizeC / BytePerPixelDETC[k] / (SwathWidthY[k] / 2.0);
                        LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
                        FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath
                                        * (HTotal[k] / PixelClock[k]) / (VRatio[k] / 2);
@@ -5574,11 +5574,11 @@ static void CalculateDCFCLKDeepSleep(
 }
 
 static void CalculateDETBufferSize(
-               double DETBufferSizeInKByte,
+               unsigned int DETBufferSizeInKByte,
                unsigned int SwathHeightY,
                unsigned int SwathHeightC,
-               double *DETBufferSizeY,
-               double *DETBufferSizeC)
+               unsigned int *DETBufferSizeY,
+               unsigned int *DETBufferSizeC)
 {
        if (SwathHeightC == 0) {
                *DETBufferSizeY = DETBufferSizeInKByte * 1024;
@@ -5625,8 +5625,8 @@ static void CalculateUrgentBurstFactor(
        double DETBufferSizeInTimeLumaPre;
        double DETBufferSizeInTimeChroma;
        double DETBufferSizeInTimeChromaPre;
-       double DETBufferSizeY;
-       double DETBufferSizeC;
+       unsigned int DETBufferSizeY;
+       unsigned int DETBufferSizeC;
 
        *NotEnoughUrgentLatencyHiding = 0;
        *NotEnoughUrgentLatencyHidingPre = 0;
@@ -5663,7 +5663,7 @@ static void CalculateUrgentBurstFactor(
                        &DETBufferSizeY,
                        &DETBufferSizeC);
 
-       LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / SwathWidthY;
+       LinesInDETLuma = (double)DETBufferSizeY / BytePerPixelInDETY / SwathWidthY;
        DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
        if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
                *NotEnoughUrgentLatencyHiding = 1;
@@ -5687,7 +5687,7 @@ static void CalculateUrgentBurstFactor(
        }
 
        if (BytePerPixelInDETC > 0) {
-               LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / (SwathWidthY / 2);
+               LinesInDETChroma = (double)DETBufferSizeC / BytePerPixelInDETC / (SwathWidthY / 2);
                DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime
                                / (VRatio / 2);
                if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
index cb3f70a71b51258e159f3f939a17606d67328219..ec56210b618091f609c2e988133e2b3efe077710 100644 (file)
@@ -299,7 +299,7 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
                unsigned int MaxLineBufferLines,
                unsigned int LineBufferSize,
                unsigned int DPPOutputBufferPixels,
-               double DETBufferSizeInKByte,
+               unsigned int DETBufferSizeInKByte,
                unsigned int WritebackInterfaceBufferSize,
                double DCFCLK,
                double ReturnBW,
@@ -318,8 +318,8 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
                unsigned int DPPPerPlane[],
                bool DCCEnable[],
                double DPPCLK[],
-               double DETBufferSizeY[],
-               double DETBufferSizeC[],
+               unsigned int DETBufferSizeY[],
+               unsigned int DETBufferSizeC[],
                unsigned int SwathHeightY[],
                unsigned int SwathHeightC[],
                unsigned int LBBitPerPixel[],
@@ -570,7 +570,7 @@ static void CalculateStutterEfficiency(
                double SRExitTime,
                bool SynchronizedVBlank,
                int DPPPerPlane[],
-               double DETBufferSizeY[],
+               unsigned int DETBufferSizeY[],
                int BytePerPixelY[],
                double BytePerPixelDETY[],
                double SwathWidthY[],
@@ -603,7 +603,7 @@ static void CalculateStutterEfficiency(
 static void CalculateSwathAndDETConfiguration(
                bool ForceSingleDPP,
                int NumberOfActivePlanes,
-               long DETBufferSizeInKByte,
+               unsigned int DETBufferSizeInKByte,
                double MaximumSwathWidthLuma[],
                double MaximumSwathWidthChroma[],
                enum scan_direction_class SourceScan[],
@@ -635,8 +635,8 @@ static void CalculateSwathAndDETConfiguration(
                double SwathWidthChroma[],
                int SwathHeightY[],
                int SwathHeightC[],
-               double DETBufferSizeY[],
-               double DETBufferSizeC[],
+               unsigned int DETBufferSizeY[],
+               unsigned int DETBufferSizeC[],
                bool ViewportSizeSupportPerPlane[],
                bool *ViewportSizeSupport);
 static void CalculateSwathWidth(
@@ -2613,7 +2613,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
                        CalculateUrgentBurstFactor(
                                        v->swath_width_luma_ub[k],
                                        v->swath_width_chroma_ub[k],
-                                       v->DETBufferSizeInKByte,
+                                       v->DETBufferSizeInKByte[0],
                                        v->SwathHeightY[k],
                                        v->SwathHeightC[k],
                                        v->HTotal[k] / v->PixelClock[k],
@@ -2635,7 +2635,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
                        CalculateUrgentBurstFactor(
                                        v->swath_width_luma_ub[k],
                                        v->swath_width_chroma_ub[k],
-                                       v->DETBufferSizeInKByte,
+                                       v->DETBufferSizeInKByte[0],
                                        v->SwathHeightY[k],
                                        v->SwathHeightC[k],
                                        v->HTotal[k] / v->PixelClock[k],
@@ -2808,7 +2808,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
                        v->MaxLineBufferLines,
                        v->LineBufferSize,
                        v->DPPOutputBufferPixels,
-                       v->DETBufferSizeInKByte,
+                       v->DETBufferSizeInKByte[0],
                        v->WritebackInterfaceBufferSize,
                        v->DCFCLK,
                        v->ReturnBW,
@@ -3027,7 +3027,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
                                v->SurfaceWidthC[k],
                                v->SurfaceHeightY[k],
                                v->SurfaceHeightC[k],
-                               v->DETBufferSizeInKByte * 1024,
+                               v->DETBufferSizeInKByte[0] * 1024,
                                v->BlockHeight256BytesY[k],
                                v->BlockHeight256BytesC[k],
                                v->SurfaceTiling[k],
@@ -3177,7 +3177,7 @@ static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
        CalculateSwathAndDETConfiguration(
                        false,
                        mode_lib->vba.NumberOfActivePlanes,
-                       mode_lib->vba.DETBufferSizeInKByte,
+                       mode_lib->vba.DETBufferSizeInKByte[0],
                        dummy1,
                        dummy2,
                        mode_lib->vba.SourceScan,
@@ -3911,7 +3911,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
        CalculateSwathAndDETConfiguration(
                        true,
                        v->NumberOfActivePlanes,
-                       v->DETBufferSizeInKByte,
+                       v->DETBufferSizeInKByte[0],
                        v->MaximumSwathWidthLuma,
                        v->MaximumSwathWidthChroma,
                        v->SourceScan,
@@ -4399,7 +4399,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
                        CalculateSwathAndDETConfiguration(
                                        false,
                                        v->NumberOfActivePlanes,
-                                       v->DETBufferSizeInKByte,
+                                       v->DETBufferSizeInKByte[0],
                                        v->MaximumSwathWidthLuma,
                                        v->MaximumSwathWidthChroma,
                                        v->SourceScan,
@@ -4622,7 +4622,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
                                CalculateUrgentBurstFactor(
                                                v->swath_width_luma_ub_this_state[k],
                                                v->swath_width_chroma_ub_this_state[k],
-                                               v->DETBufferSizeInKByte,
+                                               v->DETBufferSizeInKByte[0],
                                                v->SwathHeightYThisState[k],
                                                v->SwathHeightCThisState[k],
                                                v->HTotal[k] / v->PixelClock[k],
@@ -5025,7 +5025,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
                                        CalculateUrgentBurstFactor(
                                                        v->swath_width_luma_ub_this_state[k],
                                                        v->swath_width_chroma_ub_this_state[k],
-                                                       v->DETBufferSizeInKByte,
+                                                       v->DETBufferSizeInKByte[0],
                                                        v->SwathHeightYThisState[k],
                                                        v->SwathHeightCThisState[k],
                                                        v->HTotal[k] / v->PixelClock[k],
@@ -5197,7 +5197,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
                                        v->MaxLineBufferLines,
                                        v->LineBufferSize,
                                        v->DPPOutputBufferPixels,
-                                       v->DETBufferSizeInKByte,
+                                       v->DETBufferSizeInKByte[0],
                                        v->WritebackInterfaceBufferSize,
                                        v->DCFCLKState[i][j],
                                        v->ReturnBWPerState[i][j],
@@ -5369,7 +5369,7 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
                unsigned int MaxLineBufferLines,
                unsigned int LineBufferSize,
                unsigned int DPPOutputBufferPixels,
-               double DETBufferSizeInKByte,
+               unsigned int DETBufferSizeInKByte,
                unsigned int WritebackInterfaceBufferSize,
                double DCFCLK,
                double ReturnBW,
@@ -5388,8 +5388,8 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
                unsigned int DPPPerPlane[],
                bool DCCEnable[],
                double DPPCLK[],
-               double DETBufferSizeY[],
-               double DETBufferSizeC[],
+               unsigned int DETBufferSizeY[],
+               unsigned int DETBufferSizeC[],
                unsigned int SwathHeightY[],
                unsigned int SwathHeightC[],
                unsigned int LBBitPerPixel[],
@@ -6126,7 +6126,7 @@ static void CalculateStutterEfficiency(
                double SRExitTime,
                bool SynchronizedVBlank,
                int DPPPerPlane[],
-               double DETBufferSizeY[],
+               unsigned int DETBufferSizeY[],
                int BytePerPixelY[],
                double BytePerPixelDETY[],
                double SwathWidthY[],
@@ -6273,7 +6273,7 @@ static void CalculateStutterEfficiency(
 static void CalculateSwathAndDETConfiguration(
                bool ForceSingleDPP,
                int NumberOfActivePlanes,
-               long DETBufferSizeInKByte,
+               unsigned int DETBufferSizeInKByte,
                double MaximumSwathWidthLuma[],
                double MaximumSwathWidthChroma[],
                enum scan_direction_class SourceScan[],
@@ -6305,8 +6305,8 @@ static void CalculateSwathAndDETConfiguration(
                double SwathWidthChroma[],
                int SwathHeightY[],
                int SwathHeightC[],
-               double DETBufferSizeY[],
-               double DETBufferSizeC[],
+               unsigned int DETBufferSizeY[],
+               unsigned int DETBufferSizeC[],
                bool ViewportSizeSupportPerPlane[],
                bool *ViewportSizeSupport)
 {
index 2a967458065be006ca327f9e1f4a8264453480fa..d764d784e2790682e4a68f1bb31acbefac0e718f 100644 (file)
@@ -299,7 +299,7 @@ static void fetch_ip_params(struct display_mode_lib *mode_lib)
        mode_lib->vba.MaxDCHUBToPSCLThroughput = ip->max_dchub_pscl_bw_pix_per_clk;
        mode_lib->vba.MaxPSCLToLBThroughput = ip->max_pscl_lb_bw_pix_per_clk;
        mode_lib->vba.ROBBufferSizeInKByte = ip->rob_buffer_size_kbytes;
-       mode_lib->vba.DETBufferSizeInKByte = ip->det_buffer_size_kbytes;
+       mode_lib->vba.DETBufferSizeInKByte[0] = ip->det_buffer_size_kbytes;
 
        mode_lib->vba.PixelChunkSizeInKByte = ip->pixel_chunk_size_kbytes;
        mode_lib->vba.MetaChunkSize = ip->meta_chunk_size_kbytes;
index 025aa5bd8ea0a0191d37343b06a69a4e93257861..86db86b7153ea8f7b2943c379ab0fddd3ccb42a2 100644 (file)
@@ -233,7 +233,7 @@ struct vba_vars_st {
        // IP Parameters
        //
        unsigned int ROBBufferSizeInKByte;
-       double DETBufferSizeInKByte;
+       unsigned int DETBufferSizeInKByte[DC__NUM_DPP__MAX];
        double DETBufferSizeInTime;
        unsigned int DPPOutputBufferPixels;
        unsigned int OPPOutputBufferLines;
@@ -351,8 +351,8 @@ struct vba_vars_st {
 
        // Intermediates/Informational
        bool ImmediateFlipSupport;
-       double DETBufferSizeY[DC__NUM_DPP__MAX];
-       double DETBufferSizeC[DC__NUM_DPP__MAX];
+       unsigned int DETBufferSizeY[DC__NUM_DPP__MAX];
+       unsigned int DETBufferSizeC[DC__NUM_DPP__MAX];
        unsigned int SwathHeightY[DC__NUM_DPP__MAX];
        unsigned int SwathHeightC[DC__NUM_DPP__MAX];
        unsigned int LBBitPerPixel[DC__NUM_DPP__MAX];
@@ -631,8 +631,8 @@ struct vba_vars_st {
        enum odm_combine_mode odm_combine_dummy[DC__NUM_DPP__MAX];
        double         dummy1[DC__NUM_DPP__MAX];
        double         dummy2[DC__NUM_DPP__MAX];
-       double         dummy3[DC__NUM_DPP__MAX];
-       double         dummy4[DC__NUM_DPP__MAX];
+       unsigned int   dummy3[DC__NUM_DPP__MAX];
+       unsigned int   dummy4[DC__NUM_DPP__MAX];
        double         dummy5;
        double         dummy6;
        double         dummy7[DC__NUM_DPP__MAX];
@@ -872,8 +872,8 @@ struct vba_vars_st {
        int PercentMarginOverMinimumRequiredDCFCLK;
        bool DynamicMetadataSupported[DC__VOLTAGE_STATES][2];
        enum immediate_flip_requirement ImmediateFlipRequirement;
-       double DETBufferSizeYThisState[DC__NUM_DPP__MAX];
-       double DETBufferSizeCThisState[DC__NUM_DPP__MAX];
+       unsigned int DETBufferSizeYThisState[DC__NUM_DPP__MAX];
+       unsigned int DETBufferSizeCThisState[DC__NUM_DPP__MAX];
        bool NoUrgentLatencyHiding[DC__NUM_DPP__MAX];
        bool NoUrgentLatencyHidingPre[DC__NUM_DPP__MAX];
        int swath_width_luma_ub_this_state[DC__NUM_DPP__MAX];
index be57088d185dcd46f7ad5f6f26e922d97a3fedfa..f403d8e84a8c1e8d6e16e8e3ddef1773d3440881 100644 (file)
@@ -37,6 +37,8 @@ static uint32_t dsc_policy_max_target_bpp_limit = 16;
 /* default DSC policy enables DSC only when needed */
 static bool dsc_policy_enable_dsc_when_not_needed;
 
+static bool dsc_policy_disable_dsc_stream_overhead;
+
 static bool dsc_buff_block_size_from_dpcd(int dpcd_buff_block_size, int *buff_block_size)
 {
 
@@ -250,6 +252,7 @@ static bool intersect_dsc_caps(
        if (pixel_encoding == PIXEL_ENCODING_YCBCR422 || pixel_encoding == PIXEL_ENCODING_YCBCR420)
                dsc_common_caps->bpp_increment_div = min(dsc_common_caps->bpp_increment_div, (uint32_t)8);
 
+       dsc_common_caps->is_dp = dsc_sink_caps->is_dp;
        return true;
 }
 
@@ -258,12 +261,63 @@ static inline uint32_t dsc_div_by_10_round_up(uint32_t value)
        return (value + 9) / 10;
 }
 
+static struct fixed31_32 compute_dsc_max_bandwidth_overhead(
+               const struct dc_crtc_timing *timing,
+               const int num_slices_h,
+               const bool is_dp)
+{
+       struct fixed31_32 max_dsc_overhead;
+       struct fixed31_32 refresh_rate;
+
+       if (dsc_policy_disable_dsc_stream_overhead || !is_dp)
+               return dc_fixpt_from_int(0);
+
+       /* use target bpp that can take entire target bandwidth */
+       refresh_rate = dc_fixpt_from_int(timing->pix_clk_100hz);
+       refresh_rate = dc_fixpt_div_int(refresh_rate, timing->h_total);
+       refresh_rate = dc_fixpt_div_int(refresh_rate, timing->v_total);
+       refresh_rate = dc_fixpt_mul_int(refresh_rate, 100);
+
+       max_dsc_overhead = dc_fixpt_from_int(num_slices_h);
+       max_dsc_overhead = dc_fixpt_mul_int(max_dsc_overhead, timing->v_total);
+       max_dsc_overhead = dc_fixpt_mul_int(max_dsc_overhead, 256);
+       max_dsc_overhead = dc_fixpt_div_int(max_dsc_overhead, 1000);
+       max_dsc_overhead = dc_fixpt_mul(max_dsc_overhead, refresh_rate);
+
+       return max_dsc_overhead;
+}
+
+static uint32_t compute_bpp_x16_from_target_bandwidth(
+               const uint32_t bandwidth_in_kbps,
+               const struct dc_crtc_timing *timing,
+               const uint32_t num_slices_h,
+               const uint32_t bpp_increment_div,
+               const bool is_dp)
+{
+       struct fixed31_32 overhead_in_kbps;
+       struct fixed31_32 effective_bandwidth_in_kbps;
+       struct fixed31_32 bpp_x16;
+
+       overhead_in_kbps = compute_dsc_max_bandwidth_overhead(
+                               timing, num_slices_h, is_dp);
+       effective_bandwidth_in_kbps = dc_fixpt_from_int(bandwidth_in_kbps);
+       effective_bandwidth_in_kbps = dc_fixpt_sub(effective_bandwidth_in_kbps,
+                       overhead_in_kbps);
+       bpp_x16 = dc_fixpt_mul_int(effective_bandwidth_in_kbps, 10);
+       bpp_x16 = dc_fixpt_div_int(bpp_x16, timing->pix_clk_100hz);
+       bpp_x16 = dc_fixpt_from_int(dc_fixpt_floor(dc_fixpt_mul_int(bpp_x16, bpp_increment_div)));
+       bpp_x16 = dc_fixpt_div_int(bpp_x16, bpp_increment_div);
+       bpp_x16 = dc_fixpt_mul_int(bpp_x16, 16);
+       return dc_fixpt_floor(bpp_x16);
+}
+
 /* Get DSC bandwidth range based on [min_bpp, max_bpp] target bitrate range, and timing's pixel clock
  * and uncompressed bandwidth.
  */
 static void get_dsc_bandwidth_range(
                const uint32_t min_bpp_x16,
                const uint32_t max_bpp_x16,
+               const uint32_t num_slices_h,
                const struct dsc_enc_caps *dsc_caps,
                const struct dc_crtc_timing *timing,
                struct dc_dsc_bw_range *range)
@@ -272,16 +326,21 @@ static void get_dsc_bandwidth_range(
        range->stream_kbps = dc_bandwidth_in_kbps_from_timing(timing);
 
        /* max dsc target bpp */
-       range->max_kbps = dc_dsc_stream_bandwidth_in_kbps(timing->pix_clk_100hz, max_bpp_x16);
+       range->max_kbps = dc_dsc_stream_bandwidth_in_kbps(timing,
+                       max_bpp_x16, num_slices_h, dsc_caps->is_dp);
        range->max_target_bpp_x16 = max_bpp_x16;
        if (range->max_kbps > range->stream_kbps) {
                /* max dsc target bpp is capped to native bandwidth */
                range->max_kbps = range->stream_kbps;
-               range->max_target_bpp_x16 = calc_dsc_bpp_x16(range->stream_kbps, timing->pix_clk_100hz, dsc_caps->bpp_increment_div);
+               range->max_target_bpp_x16 = compute_bpp_x16_from_target_bandwidth(
+                               range->max_kbps, timing, num_slices_h,
+                               dsc_caps->bpp_increment_div,
+                               dsc_caps->is_dp);
        }
 
        /* min dsc target bpp */
-       range->min_kbps = dc_dsc_stream_bandwidth_in_kbps(timing->pix_clk_100hz, min_bpp_x16);
+       range->min_kbps = dc_dsc_stream_bandwidth_in_kbps(timing,
+                       min_bpp_x16, num_slices_h, dsc_caps->is_dp);
        range->min_target_bpp_x16 = min_bpp_x16;
        if (range->min_kbps > range->max_kbps) {
                /* min dsc target bpp is capped to max dsc bandwidth*/
@@ -290,7 +349,6 @@ static void get_dsc_bandwidth_range(
        }
 }
 
-
 /* Decides if DSC should be used and calculates target bpp if it should, applying DSC policy.
  *
  * Returns:
@@ -303,6 +361,7 @@ static bool decide_dsc_target_bpp_x16(
                const struct dsc_enc_caps *dsc_common_caps,
                const int target_bandwidth_kbps,
                const struct dc_crtc_timing *timing,
+               const int num_slices_h,
                int *target_bpp_x16)
 {
        bool should_use_dsc = false;
@@ -311,7 +370,7 @@ static bool decide_dsc_target_bpp_x16(
        memset(&range, 0, sizeof(range));
 
        get_dsc_bandwidth_range(policy->min_target_bpp * 16, policy->max_target_bpp * 16,
-                       dsc_common_caps, timing, &range);
+                       num_slices_h, dsc_common_caps, timing, &range);
        if (!policy->enable_dsc_when_not_needed && target_bandwidth_kbps >= range.stream_kbps) {
                /* enough bandwidth without dsc */
                *target_bpp_x16 = 0;
@@ -327,7 +386,10 @@ static bool decide_dsc_target_bpp_x16(
                should_use_dsc = true;
        } else if (target_bandwidth_kbps >= range.min_kbps) {
                /* use target bpp that can take entire target bandwidth */
-               *target_bpp_x16 = calc_dsc_bpp_x16(target_bandwidth_kbps, timing->pix_clk_100hz, dsc_common_caps->bpp_increment_div);
+               *target_bpp_x16 = compute_bpp_x16_from_target_bandwidth(
+                               target_bandwidth_kbps, timing, num_slices_h,
+                               dsc_common_caps->bpp_increment_div,
+                               dsc_common_caps->is_dp);
                should_use_dsc = true;
        } else {
                /* not enough bandwidth to fulfill minimum requirement */
@@ -531,18 +593,6 @@ static bool setup_dsc_config(
        if (!is_dsc_possible)
                goto done;
 
-       if (target_bandwidth_kbps > 0) {
-               is_dsc_possible = decide_dsc_target_bpp_x16(
-                               &policy,
-                               &dsc_common_caps,
-                               target_bandwidth_kbps,
-                               timing,
-                               &target_bpp);
-               dsc_cfg->bits_per_pixel = target_bpp;
-       }
-       if (!is_dsc_possible)
-               goto done;
-
        sink_per_slice_throughput_mps = 0;
 
        // Validate available DSC settings against the mode timing
@@ -690,12 +740,26 @@ static bool setup_dsc_config(
 
        dsc_cfg->num_slices_v = pic_height/slice_height;
 
+       if (target_bandwidth_kbps > 0) {
+               is_dsc_possible = decide_dsc_target_bpp_x16(
+                               &policy,
+                               &dsc_common_caps,
+                               target_bandwidth_kbps,
+                               timing,
+                               num_slices_h,
+                               &target_bpp);
+               dsc_cfg->bits_per_pixel = target_bpp;
+       }
+       if (!is_dsc_possible)
+               goto done;
+
        // Final decission: can we do DSC or not?
        if (is_dsc_possible) {
                // Fill out the rest of DSC settings
                dsc_cfg->block_pred_enable = dsc_common_caps.is_block_pred_supported;
                dsc_cfg->linebuf_depth = dsc_common_caps.lb_bit_depth;
                dsc_cfg->version_minor = (dsc_common_caps.dsc_version & 0xf0) >> 4;
+               dsc_cfg->is_dp = dsc_sink_caps->is_dp;
        }
 
 done:
@@ -806,6 +870,7 @@ bool dc_dsc_parse_dsc_dpcd(const struct dc *dc, const uint8_t *dpcd_dsc_basic_da
        dsc_sink_caps->branch_max_line_width = dpcd_dsc_branch_decoder_caps[DP_DSC_BRANCH_MAX_LINE_WIDTH - DP_DSC_BRANCH_OVERALL_THROUGHPUT_0] * 320;
        ASSERT(dsc_sink_caps->branch_max_line_width == 0 || dsc_sink_caps->branch_max_line_width >= 5120);
 
+       dsc_sink_caps->is_dp = true;
        return true;
 }
 
@@ -838,7 +903,8 @@ bool dc_dsc_compute_bandwidth_range(
                                dsc_min_slice_height_override, max_bpp_x16, &config);
 
        if (is_dsc_possible)
-               get_dsc_bandwidth_range(min_bpp_x16, max_bpp_x16, &dsc_common_caps, timing, range);
+               get_dsc_bandwidth_range(min_bpp_x16, max_bpp_x16,
+                               config.num_slices_h, &dsc_common_caps, timing, range);
 
        return is_dsc_possible;
 }
@@ -864,13 +930,20 @@ bool dc_dsc_compute_config(
        return is_dsc_possible;
 }
 
-uint32_t dc_dsc_stream_bandwidth_in_kbps(uint32_t pix_clk_100hz, uint32_t bpp_x16)
+uint32_t dc_dsc_stream_bandwidth_in_kbps(const struct dc_crtc_timing *timing,
+               uint32_t bpp_x16, uint32_t num_slices_h, bool is_dp)
 {
-       struct fixed31_32 link_bw_kbps;
-       link_bw_kbps = dc_fixpt_from_int(pix_clk_100hz);
-       link_bw_kbps = dc_fixpt_div_int(link_bw_kbps, 160);
-       link_bw_kbps = dc_fixpt_mul_int(link_bw_kbps, bpp_x16);
-       return dc_fixpt_ceil(link_bw_kbps);
+       struct fixed31_32 overhead_in_kbps;
+       struct fixed31_32 bpp;
+       struct fixed31_32 actual_bandwidth_in_kbps;
+
+       overhead_in_kbps = compute_dsc_max_bandwidth_overhead(
+                       timing, num_slices_h, is_dp);
+       bpp = dc_fixpt_from_fraction(bpp_x16, 16);
+       actual_bandwidth_in_kbps = dc_fixpt_from_fraction(timing->pix_clk_100hz, 10);
+       actual_bandwidth_in_kbps = dc_fixpt_mul(actual_bandwidth_in_kbps, bpp);
+       actual_bandwidth_in_kbps = dc_fixpt_add(actual_bandwidth_in_kbps, overhead_in_kbps);
+       return dc_fixpt_ceil(actual_bandwidth_in_kbps);
 }
 
 void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing, uint32_t max_target_bpp_limit_override_x16, struct dc_dsc_policy *policy)
@@ -954,3 +1027,8 @@ void dc_dsc_policy_set_enable_dsc_when_not_needed(bool enable)
 {
        dsc_policy_enable_dsc_when_not_needed = enable;
 }
+
+void dc_dsc_policy_set_disable_dsc_stream_overhead(bool disable)
+{
+       dsc_policy_disable_dsc_stream_overhead = disable;
+}
index c6a1cd80aeae4fa71ee7fc7fff84fcbfb8282c17..7b294f637881ab0ccb5a41817fce9a798ed93f22 100644 (file)
@@ -284,26 +284,6 @@ static u32 _do_bytes_per_pixel_calc(int slice_width, u16 drm_bpp,
        return bytes_per_pixel;
 }
 
-static u32 _do_calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz,
-                               u32 bpp_increment_div)
-{
-       u32 dsc_target_bpp_x16;
-       float f_dsc_target_bpp;
-       float f_stream_bandwidth_100bps;
-       // bpp_increment_div is actually precision
-       u32 precision = bpp_increment_div;
-
-       f_stream_bandwidth_100bps = stream_bandwidth_kbps * 10.0f;
-       f_dsc_target_bpp = f_stream_bandwidth_100bps / pix_clk_100hz;
-
-       // Round down to the nearest precision stop to bring it into DSC spec
-       // range
-       dsc_target_bpp_x16 = (u32)(f_dsc_target_bpp * precision);
-       dsc_target_bpp_x16 = (dsc_target_bpp_x16 * 16) / precision;
-
-       return dsc_target_bpp_x16;
-}
-
 /**
  * calc_rc_params - reads the user's cmdline mode
  * @rc: DC internal DSC parameters
@@ -367,26 +347,3 @@ u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps)
        DC_FP_END();
        return ret;
 }
-
-/**
- * calc_dsc_bpp_x16 - retrieve the dsc bits per pixel
- * @stream_bandwidth_kbps:
- * @pix_clk_100hz:
- * @bpp_increment_div:
- *
- * Calculate the total of bits per pixel for DSC configuration.
- *
- * @note This calculation requires float point operation, most of it executes
- * under kernel_fpu_{begin,end}.
- */
-u32 calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz,
-                    u32 bpp_increment_div)
-{
-       u32 dsc_bpp;
-
-       DC_FP_START();
-       dsc_bpp =  _do_calc_dsc_bpp_x16(stream_bandwidth_kbps, pix_clk_100hz,
-                                       bpp_increment_div);
-       DC_FP_END();
-       return dsc_bpp;
-}
index 8123827840c58b7fcd61cf58c3767316125599ef..262f06afcbf95e5389c11ea19694521f66ada9d4 100644 (file)
@@ -79,8 +79,6 @@ typedef struct qp_entry qp_table[];
 
 void calc_rc_params(struct rc_params *rc, const struct drm_dsc_config *pps);
 u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps);
-u32 calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz,
-                    u32 bpp_increment_div);
 
 #endif
 
index 3ae05c96d55723c4bac6f60529d2564e1334d5d1..428842511c039bea9443d0adb9ffc8f17afaf23c 100644 (file)
@@ -65,7 +65,8 @@ bool perform_link_training_with_retries(
        bool skip_video_pattern,
        int attempts,
        struct pipe_ctx *pipe_ctx,
-       enum signal_type signal);
+       enum signal_type signal,
+       bool do_fallback);
 
 bool is_mst_supported(struct dc_link *link);
 
@@ -75,6 +76,8 @@ void detect_edp_sink_caps(struct dc_link *link);
 
 bool is_dp_active_dongle(const struct dc_link *link);
 
+bool is_dp_branch_device(const struct dc_link *link);
+
 bool is_edp_ilr_optimization_required(struct dc_link *link, struct dc_crtc_timing *crtc_timing);
 
 void dp_enable_mst_on_sink(struct dc_link *link, bool enable);
@@ -94,5 +97,12 @@ void dp_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable);
 bool dp_update_dsc_config(struct pipe_ctx *pipe_ctx);
 bool dp_set_dsc_on_rx(struct pipe_ctx *pipe_ctx, bool enable);
 
+/* Convert PHY repeater count read from DPCD uint8_t. */
+uint8_t dp_convert_to_count(uint8_t lttpr_repeater_count);
+
+/* Check DPCD training status registers to detect link loss. */
+enum link_training_result dp_check_link_loss_status(
+               struct dc_link *link,
+               const struct link_training_settings *link_training_setting);
 
 #endif /* __DC_LINK_DP_H__ */
index f520e13aee4c248846e8f741c833b902edfd8918..f94135c6e3c222b1ccb54f58825949581e548ce7 100644 (file)
@@ -88,6 +88,7 @@ struct dsc_enc_caps {
        int32_t max_total_throughput_mps; /* Maximum total throughput with all the slices combined */
        int32_t max_slice_width;
        uint32_t bpp_increment_div; /* bpp increment divisor, e.g. if 16, it's 1/16th of a bit */
+       bool is_dp;
 };
 
 struct dsc_funcs {
index 7d36e55f30979d2c0f9148ffd755fad1e7567e62..883dd8733ea4e519c3f09bfb2bd03490cc0ac606 100644 (file)
@@ -81,6 +81,11 @@ struct dc_link *link_enc_cfg_get_link_using_link_enc(
 /* Return DIG link encoder used by link. NULL if unused. */
 struct link_encoder *link_enc_cfg_get_link_enc_used_by_link(
                struct dc_state *state,
-               struct dc_link *link);
+               const struct dc_link *link);
+
+/* Return next available DIG link encoder. NULL if none available. */
+struct link_encoder *link_enc_cfg_get_next_avail_link_enc(
+       const struct dc *dc,
+       const struct dc_state *state);
 
 #endif /* DC_INC_LINK_ENC_CFG_H_ */
index 1a5be2792055e16b8f9220c94b3946c2b87d5c2a..ed54e1c819beda7b13e371249f21adf65205133b 100644 (file)
@@ -58,8 +58,8 @@ enum dc_irq_source to_dal_irq_source_dcn21(
                return DC_IRQ_SOURCE_VBLANK5;
        case DCN_1_0__SRCID__DC_D6_OTG_VSTARTUP:
                return DC_IRQ_SOURCE_VBLANK6;
-       case DCN_1_0__SRCID__DMCUB_OUTBOX_HIGH_PRIORITY_READY_INT:
-               return DC_IRQ_SOURCE_DMCUB_OUTBOX0;
+       case DCN_1_0__SRCID__DMCUB_OUTBOX_LOW_PRIORITY_READY_INT:
+               return DC_IRQ_SOURCE_DMCUB_OUTBOX;
        case DCN_1_0__SRCID__OTG1_VERTICAL_INTERRUPT0_CONTROL:
                return DC_IRQ_SOURCE_DC1_VLINE0;
        case DCN_1_0__SRCID__OTG2_VERTICAL_INTERRUPT0_CONTROL:
@@ -187,7 +187,7 @@ static const struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
        .ack = NULL
 };
 
-static const struct irq_source_info_funcs dmub_trace_irq_info_funcs = {
+static const struct irq_source_info_funcs dmub_outbox_irq_info_funcs = {
        .set = NULL,
        .ack = NULL
 };
@@ -301,11 +301,11 @@ static const struct irq_source_info_funcs vline0_irq_info_funcs = {
                .funcs = &vline0_irq_info_funcs\
        }
 
-#define dmub_trace_int_entry()\
-       [DC_IRQ_SOURCE_DMCUB_OUTBOX0] = {\
-               IRQ_REG_ENTRY_DMUB(DMCUB_INTERRUPT_ENABLE, DMCUB_OUTBOX0_READY_INT_EN,\
-                       DMCUB_INTERRUPT_ACK, DMCUB_OUTBOX0_READY_INT_ACK),\
-               .funcs = &dmub_trace_irq_info_funcs\
+#define dmub_outbox_int_entry()\
+       [DC_IRQ_SOURCE_DMCUB_OUTBOX] = {\
+               IRQ_REG_ENTRY_DMUB(DMCUB_INTERRUPT_ENABLE, DMCUB_OUTBOX1_READY_INT_EN,\
+                       DMCUB_INTERRUPT_ACK, DMCUB_OUTBOX1_READY_INT_ACK),\
+               .funcs = &dmub_outbox_irq_info_funcs\
        }
 
 #define dummy_irq_entry() \
@@ -426,7 +426,7 @@ irq_source_info_dcn21[DAL_IRQ_SOURCES_NUMBER] = {
        vline0_int_entry(3),
        vline0_int_entry(4),
        vline0_int_entry(5),
-       dmub_trace_int_entry(),
+       dmub_outbox_int_entry(),
 };
 
 static const struct irq_service_funcs irq_service_funcs_dcn21 = {
index ae8f47ec0f8c338b5b41787dd95dfc4a525c5e3d..5f9346622301efccb35b6085b4b411d3c101567c 100644 (file)
@@ -150,7 +150,7 @@ enum dc_irq_source {
        DC_IRQ_SOURCE_DC4_VLINE1,
        DC_IRQ_SOURCE_DC5_VLINE1,
        DC_IRQ_SOURCE_DC6_VLINE1,
-       DC_IRQ_DMCUB_OUTBOX1,
+       DC_IRQ_SOURCE_DMCUB_OUTBOX,
        DC_IRQ_SOURCE_DMCUB_OUTBOX0,
 
        DAL_IRQ_SOURCES_NUMBER
index b4e14960b164dd896c6ceca34e239efba0fb2e22..3ef6b536cceafb0e9d95b38d37379a1bbcd76d45 100644 (file)
@@ -216,6 +216,23 @@ struct dmub_srv_fb_info {
        struct dmub_fb fb[DMUB_WINDOW_TOTAL];
 };
 
+/*
+ * struct dmub_srv_hw_params - params for dmub hardware initialization
+ * @fb: framebuffer info for each region
+ * @fb_base: base of the framebuffer aperture
+ * @fb_offset: offset of the framebuffer aperture
+ * @psp_version: psp version to pass for DMCU init
+ * @load_inst_const: true if DMUB should load inst const fw
+ */
+struct dmub_srv_hw_params {
+       struct dmub_fb *fb[DMUB_WINDOW_TOTAL];
+       uint64_t fb_base;
+       uint64_t fb_offset;
+       uint32_t psp_version;
+       bool load_inst_const;
+       bool skip_panel_power_sequence;
+};
+
 /**
  * struct dmub_srv_base_funcs - Driver specific base callbacks
  */
@@ -290,7 +307,8 @@ struct dmub_srv_hw_funcs {
        bool (*is_hw_init)(struct dmub_srv *dmub);
 
        bool (*is_phy_init)(struct dmub_srv *dmub);
-       void (*enable_dmub_boot_options)(struct dmub_srv *dmub);
+       void (*enable_dmub_boot_options)(struct dmub_srv *dmub,
+                               const struct dmub_srv_hw_params *params);
 
        void (*skip_dmub_panel_power_sequence)(struct dmub_srv *dmub, bool skip);
 
@@ -325,23 +343,6 @@ struct dmub_srv_create_params {
        bool is_virtual;
 };
 
-/*
- * struct dmub_srv_hw_params - params for dmub hardware initialization
- * @fb: framebuffer info for each region
- * @fb_base: base of the framebuffer aperture
- * @fb_offset: offset of the framebuffer aperture
- * @psp_version: psp version to pass for DMCU init
- * @load_inst_const: true if DMUB should load inst const fw
- */
-struct dmub_srv_hw_params {
-       struct dmub_fb *fb[DMUB_WINDOW_TOTAL];
-       uint64_t fb_base;
-       uint64_t fb_offset;
-       uint32_t psp_version;
-       bool load_inst_const;
-       bool skip_panel_power_sequence;
-};
-
 /**
  * struct dmub_srv - software state for dmcub
  * @asic: dmub asic identifier
index 4195ff10c5148a202baf8d7da49eb9432d8681bf..40ce15eb934c15fc9eeaaf8378f7a86479bfa04f 100644 (file)
 
 /* Firmware versioning. */
 #ifdef DMUB_EXPOSE_VERSION
-#define DMUB_FW_VERSION_GIT_HASH 0x23db9b126
+#define DMUB_FW_VERSION_GIT_HASH 0x2cab49dfb
 #define DMUB_FW_VERSION_MAJOR 0
 #define DMUB_FW_VERSION_MINOR 0
-#define DMUB_FW_VERSION_REVISION 62
+#define DMUB_FW_VERSION_REVISION 65
 #define DMUB_FW_VERSION_TEST 0
 #define DMUB_FW_VERSION_VBIOS 0
 #define DMUB_FW_VERSION_HOTFIX 0
 /* Maximum number of planes on any ASIC. */
 #define DMUB_MAX_PLANES 6
 
+#define DMUB_MAX_SUBVP_STREAMS 2
+
 /* Trace buffer offset for entry */
 #define TRACE_BUFFER_ENTRY_OFFSET  16
 
 /**
- * ABM backlight control version legacy
+ * ABM control version legacy
  */
-#define DMUB_CMD_ABM_SET_BACKLIGHT_VERSION_UNKNOWN 0x0
+#define DMUB_CMD_ABM_CONTROL_VERSION_UNKNOWN 0x0
 
 /**
- * ABM backlight control version with multi edp support
+ * ABM control version with multi edp support
  */
-#define DMUB_CMD_ABM_SET_BACKLIGHT_VERSION_1 0x1
+#define DMUB_CMD_ABM_CONTROL_VERSION_1 0x1
 
 /**
  * Physical framebuffer address location, 64-bit.
@@ -162,6 +164,13 @@ extern "C" {
 #define dmub_udelay(microseconds) udelay(microseconds)
 #endif
 
+/**
+ * Number of nanoseconds per DMUB tick.
+ * DMCUB_TIMER_CURRENT increments in DMUB ticks, which are 10ns by default.
+ * If DMCUB_TIMER_WINDOW is non-zero this will no longer be true.
+ */
+#define NS_PER_DMUB_TICK 10
+
 /**
  * union dmub_addr - DMUB physical/virtual 64-bit address.
  */
@@ -328,7 +337,8 @@ union dmub_fw_boot_options {
                uint32_t skip_phy_access : 1; /**< 1 if PHY access should be skipped */
                uint32_t disable_clk_gate: 1; /**< 1 if clock gating should be disabled */
                uint32_t skip_phy_init_panel_sequence: 1; /**< 1 to skip panel init seq */
-               uint32_t reserved : 26; /**< reserved */
+               uint32_t reserved_unreleased: 1; /**< reserved for an unreleased feature */
+               uint32_t reserved : 25; /**< reserved */
        } bits; /**< boot bits */
        uint32_t all; /**< 32-bit access to bits */
 };
@@ -452,6 +462,61 @@ enum dmub_gpint_command {
        DMUB_GPINT__PSR_RESIDENCY = 9,
 };
 
+/**
+ * INBOX0 generic command definition
+ */
+union dmub_inbox0_cmd_common {
+       struct {
+               uint32_t command_code: 8; /**< INBOX0 command code */
+               uint32_t param: 24; /**< 24-bit parameter */
+       } bits;
+       uint32_t all;
+};
+
+/**
+ * INBOX0 hw_lock command definition
+ */
+union dmub_inbox0_cmd_lock_hw {
+       struct {
+               uint32_t command_code: 8;
+
+               /* NOTE: Must be have enough bits to match: enum hw_lock_client */
+               uint32_t hw_lock_client: 1;
+
+               /* NOTE: Below fields must match with: struct dmub_hw_lock_inst_flags */
+               uint32_t otg_inst: 3;
+               uint32_t opp_inst: 3;
+               uint32_t dig_inst: 3;
+
+               /* NOTE: Below fields must match with: union dmub_hw_lock_flags */
+               uint32_t lock_pipe: 1;
+               uint32_t lock_cursor: 1;
+               uint32_t lock_dig: 1;
+               uint32_t triple_buffer_lock: 1;
+
+               uint32_t lock: 1;                               /**< Lock */
+               uint32_t should_release: 1;             /**< Release */
+               uint32_t reserved: 8;                   /**< Reserved for extending more clients, HW, etc. */
+       } bits;
+       uint32_t all;
+};
+
+union dmub_inbox0_data_register {
+       union dmub_inbox0_cmd_common inbox0_cmd_common;
+       union dmub_inbox0_cmd_lock_hw inbox0_cmd_lock_hw;
+};
+
+enum dmub_inbox0_command {
+       /**
+        * DESC: Invalid command, ignored.
+        */
+       DMUB_INBOX0_CMD__INVALID_COMMAND = 0,
+       /**
+        * DESC: Notification to acquire/release HW lock
+        * ARGS:
+        */
+       DMUB_INBOX0_CMD__HW_LOCK = 1,
+};
 //==============================================================================
 //</DMUB_GPINT>=================================================================
 //==============================================================================
@@ -570,7 +635,8 @@ struct dmub_cmd_header {
        unsigned int type : 8; /**< command type */
        unsigned int sub_type : 8; /**< command sub type */
        unsigned int ret_status : 1; /**< 1 if returned data, 0 otherwise */
-       unsigned int reserved0 : 7; /**< reserved bits */
+       unsigned int multi_cmd_pending : 1; /**< 1 if multiple commands chained together */
+       unsigned int reserved0 : 6; /**< reserved bits */
        unsigned int payload_bytes : 6;  /* payload excluding header - up to 60 bytes */
        unsigned int reserved1 : 2; /**< reserved bits */
 };
@@ -1343,6 +1409,9 @@ struct dmub_rb_cmd_psr_force_static {
 
 /**
  * Set of HW components that can be locked.
+ *
+ * Note: If updating with more HW components, fields
+ * in dmub_inbox0_cmd_lock_hw must be updated to match.
  */
 union dmub_hw_lock_flags {
        /**
@@ -1375,6 +1444,9 @@ union dmub_hw_lock_flags {
 
 /**
  * Instances of HW to be locked.
+ *
+ * Note: If updating with more HW components, fields
+ * in dmub_inbox0_cmd_lock_hw must be updated to match.
  */
 struct dmub_hw_lock_inst_flags {
        /**
@@ -1398,16 +1470,16 @@ struct dmub_hw_lock_inst_flags {
 
 /**
  * Clients that can acquire the HW Lock Manager.
+ *
+ * Note: If updating with more clients, fields in
+ * dmub_inbox0_cmd_lock_hw must be updated to match.
  */
 enum hw_lock_client {
        /**
         * Driver is the client of HW Lock Manager.
         */
        HW_LOCK_CLIENT_DRIVER = 0,
-       /**
-        * FW is the client of HW Lock Manager.
-        */
-       HW_LOCK_CLIENT_FW,
+       HW_LOCK_CLIENT_SUBVP = 3,
        /**
         * Invalid client.
         */
@@ -1637,7 +1709,7 @@ struct dmub_cmd_abm_set_backlight_data {
        uint32_t backlight_user_level;
 
        /**
-        * Backlight data version.
+        * ABM control version.
         */
        uint8_t version;
 
@@ -1677,6 +1749,23 @@ struct dmub_cmd_abm_set_level_data {
         * Set current ABM operating/aggression level.
         */
        uint32_t level;
+
+       /**
+        * ABM control version.
+        */
+       uint8_t version;
+
+       /**
+        * Panel Control HW instance mask.
+        * Bit 0 is Panel Control HW instance 0.
+        * Bit 1 is Panel Control HW instance 1.
+        */
+       uint8_t panel_mask;
+
+       /**
+        * Explicit padding to 4 byte boundary.
+        */
+       uint8_t pad[2];
 };
 
 /**
@@ -1702,6 +1791,23 @@ struct dmub_cmd_abm_set_ambient_level_data {
         * Ambient light sensor reading from OS.
         */
        uint32_t ambient_lux;
+
+       /**
+        * ABM control version.
+        */
+       uint8_t version;
+
+       /**
+        * Panel Control HW instance mask.
+        * Bit 0 is Panel Control HW instance 0.
+        * Bit 1 is Panel Control HW instance 1.
+        */
+       uint8_t panel_mask;
+
+       /**
+        * Explicit padding to 4 byte boundary.
+        */
+       uint8_t pad[2];
 };
 
 /**
@@ -1728,6 +1834,23 @@ struct dmub_cmd_abm_set_pwm_frac_data {
         * TODO: Convert to uint8_t.
         */
        uint32_t fractional_pwm;
+
+       /**
+        * ABM control version.
+        */
+       uint8_t version;
+
+       /**
+        * Panel Control HW instance mask.
+        * Bit 0 is Panel Control HW instance 0.
+        * Bit 1 is Panel Control HW instance 1.
+        */
+       uint8_t panel_mask;
+
+       /**
+        * Explicit padding to 4 byte boundary.
+        */
+       uint8_t pad[2];
 };
 
 /**
@@ -1758,6 +1881,24 @@ struct dmub_cmd_abm_init_config_data {
         * Indirect buffer length.
         */
        uint16_t bytes;
+
+
+       /**
+        * ABM control version.
+        */
+       uint8_t version;
+
+       /**
+        * Panel Control HW instance mask.
+        * Bit 0 is Panel Control HW instance 0.
+        * Bit 1 is Panel Control HW instance 1.
+        */
+       uint8_t panel_mask;
+
+       /**
+        * Explicit padding to 4 byte boundary.
+        */
+       uint8_t pad[2];
 };
 
 /**
@@ -2126,6 +2267,46 @@ static inline bool dmub_rb_front(struct dmub_rb *rb,
        return true;
 }
 
+/**
+ * @brief Determines the next ringbuffer offset.
+ *
+ * @param rb DMUB inbox ringbuffer
+ * @param num_cmds Number of commands
+ * @param next_rptr The next offset in the ringbuffer
+ */
+static inline void dmub_rb_get_rptr_with_offset(struct dmub_rb *rb,
+                                 uint32_t num_cmds,
+                                 uint32_t *next_rptr)
+{
+       *next_rptr = rb->rptr + DMUB_RB_CMD_SIZE * num_cmds;
+
+       if (*next_rptr >= rb->capacity)
+               *next_rptr %= rb->capacity;
+}
+
+/**
+ * @brief Returns a pointer to a command in the inbox.
+ *
+ * @param rb DMUB inbox ringbuffer
+ * @param cmd The inbox command to return
+ * @param rptr The ringbuffer offset
+ * @return true if not empty
+ * @return false otherwise
+ */
+static inline bool dmub_rb_peek_offset(struct dmub_rb *rb,
+                                union dmub_rb_cmd  **cmd,
+                                uint32_t rptr)
+{
+       uint8_t *rb_cmd = (uint8_t *)(rb->base_address) + rptr;
+
+       if (dmub_rb_empty(rb))
+               return false;
+
+       *cmd = (union dmub_rb_cmd *)rb_cmd;
+
+       return true;
+}
+
 /**
  * @brief Returns the next unprocessed command in the outbox.
  *
index 6934906c665ebc56a7751efb4f2d9ebe063a456e..b11f530f682c4c722f556f1559023559619bfcb0 100644 (file)
@@ -385,7 +385,7 @@ union dmub_fw_boot_status dmub_dcn20_get_fw_boot_status(struct dmub_srv *dmub)
        return status;
 }
 
-void dmub_dcn20_enable_dmub_boot_options(struct dmub_srv *dmub)
+void dmub_dcn20_enable_dmub_boot_options(struct dmub_srv *dmub, const struct dmub_srv_hw_params *params)
 {
        union dmub_fw_boot_options boot_options = {0};
 
index de5351cd5abc409575a517632ce20c4b13defc32..42d610a260efc8d0c38bc3908c07de0b909d089d 100644 (file)
@@ -221,7 +221,7 @@ bool dmub_dcn20_is_gpint_acked(struct dmub_srv *dmub,
 
 uint32_t dmub_dcn20_get_gpint_response(struct dmub_srv *dmub);
 
-void dmub_dcn20_enable_dmub_boot_options(struct dmub_srv *dmub);
+void dmub_dcn20_enable_dmub_boot_options(struct dmub_srv *dmub, const struct dmub_srv_hw_params *params);
 
 void dmub_dcn20_skip_dmub_panel_power_sequence(struct dmub_srv *dmub, bool skip);
 
index 1cbb125b4063b1c5e2508dbf78f51ae45a6b5b35..ed9fa6138aa696e03e7df624eab1880ed44ba7a9 100644 (file)
@@ -514,6 +514,10 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub,
        outbox0_rb_params.capacity = tracebuff_fb->size - dmub_align(TRACE_BUFFER_ENTRY_OFFSET, 64);
        dmub_rb_init(&dmub->outbox0_rb, &outbox0_rb_params);
 
+       /* Report to DMUB what features are supported by current driver */
+       if (dmub->hw_funcs.enable_dmub_boot_options)
+               dmub->hw_funcs.enable_dmub_boot_options(dmub, params);
+
        if (dmub->hw_funcs.reset_release)
                dmub->hw_funcs.reset_release(dmub);
 
index 7392a89e771ff0d6778cb66ecace660ad871964a..73ada16658aa9e970fb205aafe882d1a2ae88aef 100644 (file)
@@ -68,6 +68,14 @@ enum link_training_result {
        LINK_TRAINING_LQA_FAIL,
        /* one of the CR,EQ or symbol lock is dropped */
        LINK_TRAINING_LINK_LOSS,
+       /* Abort link training (because sink unplugged) */
+       LINK_TRAINING_ABORT,
+};
+
+enum lttpr_mode {
+       LTTPR_MODE_NON_LTTPR,
+       LTTPR_MODE_TRANSPARENT,
+       LTTPR_MODE_NON_TRANSPARENT,
 };
 
 struct link_training_settings {
index 68a6481d7f8f3094862012f80acdc6dee1f7ed7f..b963226e8af433d855ef0f6296ee30814ad6c4fe 100644 (file)
@@ -260,7 +260,6 @@ enum mod_hdcp_status mod_hdcp_setup(struct mod_hdcp *hdcp,
        struct mod_hdcp_output output;
        enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
 
-       memset(hdcp, 0, sizeof(struct mod_hdcp));
        memset(&output, 0, sizeof(output));
        hdcp->config = *config;
        HDCP_TOP_INTERFACE_TRACE(hdcp);
index 2cbd931363bdde756da39cc536ca3a26c2d7f5bb..43e6f8b17e79c75e4663d9832704c0483f9d56ef 100644 (file)
@@ -128,6 +128,11 @@ static inline uint8_t get_device_count(struct mod_hdcp *hdcp)
 
 static inline enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp)
 {
+       /* Avoid device count == 0 to do authentication */
+       if (0 == get_device_count(hdcp)) {
+               return MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE;
+       }
+
        /* Some MST display may choose to report the internal panel as an HDCP RX.
         * To update this condition with 1(because the immediate repeater's internal
         * panel is possibly not included in DEVICE_COUNT) + get_device_count(hdcp).
index c1331facdcb4934a316970fbc68eab8417402133..117c6b45f71891bcc5786dc0f4152e82ccc457bd 100644 (file)
@@ -207,6 +207,11 @@ static inline uint8_t get_device_count(struct mod_hdcp *hdcp)
 
 static enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp)
 {
+       /* Avoid device count == 0 to do authentication */
+       if (0 == get_device_count(hdcp)) {
+               return MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE;
+       }
+
        /* Some MST display may choose to report the internal panel as an HDCP RX.   */
        /* To update this condition with 1(because the immediate repeater's internal */
        /* panel is possibly not included in DEVICE_COUNT) + get_device_count(hdcp). */
index 7afa87c7ff5437d875182a552b9e1a928a38bd92..f804e13b002e97cb749e976c6c900244920e6fd6 100644 (file)
@@ -50,6 +50,7 @@
 #define DF_CS_UMC_AON0_DramBaseAddress0__AddrRngVal_MASK                                               0x00000001L
 #define DF_CS_UMC_AON0_DramBaseAddress0__LgcyMmioHoleEn_MASK                                           0x00000002L
 #define DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK                                             0x0000003CL
+#define ALDEBARAN_DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK                                   0x0000007CL
 #define DF_CS_UMC_AON0_DramBaseAddress0__IntLvAddrSel_MASK                                             0x00000E00L
 #define DF_CS_UMC_AON0_DramBaseAddress0__DramBaseAddr_MASK                                             0xFFFFF000L
 
index 3534686670362964e61ad3037982b23e414f341c..e2d13131a432c1d9909077e18f81200308bbb18b 100644 (file)
@@ -350,6 +350,7 @@ struct amd_pm_funcs {
                                   unsigned int *num_states);
        int (*get_dpm_clock_table)(void *handle,
                                   struct dpm_clocks *clock_table);
+       int (*get_smu_prv_buf_details)(void *handle, void **addr, size_t *size);
 };
 
 struct metrics_table_header {
@@ -473,6 +474,68 @@ struct gpu_metrics_v1_1 {
        uint16_t                        temperature_hbm[NUM_HBM_INSTANCES];
 };
 
+struct gpu_metrics_v1_2 {
+       struct metrics_table_header     common_header;
+
+       /* Temperature */
+       uint16_t                        temperature_edge;
+       uint16_t                        temperature_hotspot;
+       uint16_t                        temperature_mem;
+       uint16_t                        temperature_vrgfx;
+       uint16_t                        temperature_vrsoc;
+       uint16_t                        temperature_vrmem;
+
+       /* Utilization */
+       uint16_t                        average_gfx_activity;
+       uint16_t                        average_umc_activity; // memory controller
+       uint16_t                        average_mm_activity; // UVD or VCN
+
+       /* Power/Energy */
+       uint16_t                        average_socket_power;
+       uint64_t                        energy_accumulator;
+
+       /* Driver attached timestamp (in ns) */
+       uint64_t                        system_clock_counter;
+
+       /* Average clocks */
+       uint16_t                        average_gfxclk_frequency;
+       uint16_t                        average_socclk_frequency;
+       uint16_t                        average_uclk_frequency;
+       uint16_t                        average_vclk0_frequency;
+       uint16_t                        average_dclk0_frequency;
+       uint16_t                        average_vclk1_frequency;
+       uint16_t                        average_dclk1_frequency;
+
+       /* Current clocks */
+       uint16_t                        current_gfxclk;
+       uint16_t                        current_socclk;
+       uint16_t                        current_uclk;
+       uint16_t                        current_vclk0;
+       uint16_t                        current_dclk0;
+       uint16_t                        current_vclk1;
+       uint16_t                        current_dclk1;
+
+       /* Throttle status */
+       uint32_t                        throttle_status;
+
+       /* Fans */
+       uint16_t                        current_fan_speed;
+
+       /* Link width/speed */
+       uint16_t                        pcie_link_width;
+       uint16_t                        pcie_link_speed; // in 0.1 GT/s
+
+       uint16_t                        padding;
+
+       uint32_t                        gfx_activity_acc;
+       uint32_t                        mem_activity_acc;
+
+       uint16_t                        temperature_hbm[NUM_HBM_INSTANCES];
+
+       /* PMFW attached timestamp (10ns resolution) */
+       uint64_t                        firmware_timestamp;
+};
+
 /*
  * gpu_metrics_v2_0 is not recommended as it's not naturally aligned.
  * Use gpu_metrics_v2_1 or later instead.
index 9a54066ec0af706ea1315e0cafb677574791d9a2..13da377888d2c504f5811f73582e8cf7e6444309 100644 (file)
@@ -735,6 +735,23 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
  * - a list of valid ranges for sclk, mclk, and voltage curve points
  *   labeled OD_RANGE
  *
+ * < For APUs >
+ *
+ * Reading the file will display:
+ *
+ * - minimum and maximum engine clock labeled OD_SCLK
+ *
+ * - a list of valid ranges for sclk labeled OD_RANGE
+ *
+ * < For VanGogh >
+ *
+ * Reading the file will display:
+ *
+ * - minimum and maximum engine clock labeled OD_SCLK
+ * - minimum and maximum core clocks labeled OD_CCLK
+ *
+ * - a list of valid ranges for sclk and cclk labeled OD_RANGE
+ *
  * To manually adjust these settings:
  *
  * - First select manual using power_dpm_force_performance_level
@@ -743,7 +760,10 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
  *   string that contains "s/m index clock" to the file. The index
  *   should be 0 if to set minimum clock. And 1 if to set maximum
  *   clock. E.g., "s 0 500" will update minimum sclk to be 500 MHz.
- *   "m 1 800" will update maximum mclk to be 800Mhz.
+ *   "m 1 800" will update maximum mclk to be 800Mhz. For core
+ *   clocks on VanGogh, the string contains "p core index clock".
+ *   E.g., "p 2 0 800" would set the minimum core clock on core
+ *   2 to 800Mhz.
  *
  *   For sclk voltage curve, enter the new values by writing a
  *   string that contains "vc point clock voltage" to the file. The
@@ -3534,6 +3554,45 @@ out:
 
 DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_pm_info);
 
+/*
+ * amdgpu_pm_priv_buffer_read - Read memory region allocated to FW
+ *
+ * Reads debug memory region allocated to PMFW
+ */
+static ssize_t amdgpu_pm_prv_buffer_read(struct file *f, char __user *buf,
+                                        size_t size, loff_t *pos)
+{
+       struct amdgpu_device *adev = file_inode(f)->i_private;
+       const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+       void *pp_handle = adev->powerplay.pp_handle;
+       size_t smu_prv_buf_size;
+       void *smu_prv_buf;
+
+       if (amdgpu_in_reset(adev))
+               return -EPERM;
+       if (adev->in_suspend && !adev->in_runpm)
+               return -EPERM;
+
+       if (pp_funcs && pp_funcs->get_smu_prv_buf_details)
+               pp_funcs->get_smu_prv_buf_details(pp_handle, &smu_prv_buf,
+                                                 &smu_prv_buf_size);
+       else
+               return -ENOSYS;
+
+       if (!smu_prv_buf || !smu_prv_buf_size)
+               return -EINVAL;
+
+       return simple_read_from_buffer(buf, size, pos, smu_prv_buf,
+                                      smu_prv_buf_size);
+}
+
+static const struct file_operations amdgpu_debugfs_pm_prv_buffer_fops = {
+       .owner = THIS_MODULE,
+       .open = simple_open,
+       .read = amdgpu_pm_prv_buffer_read,
+       .llseek = default_llseek,
+};
+
 #endif
 
 void amdgpu_debugfs_pm_init(struct amdgpu_device *adev)
@@ -3545,5 +3604,10 @@ void amdgpu_debugfs_pm_init(struct amdgpu_device *adev)
        debugfs_create_file("amdgpu_pm_info", 0444, root, adev,
                            &amdgpu_debugfs_pm_info_fops);
 
+       if (adev->pm.smu_prv_buffer_size > 0)
+               debugfs_create_file_size("amdgpu_pm_prv_buffer", 0444, root,
+                                        adev,
+                                        &amdgpu_debugfs_pm_prv_buffer_fops,
+                                        adev->pm.smu_prv_buffer_size);
 #endif
 }
index d23533bda0026dd2ec77261930554dfdbb6fe35e..a017983ff1fabe82180c37caa0a9914f9e668b12 100644 (file)
@@ -64,7 +64,7 @@
 #define FEATURE_SMUIO_CG_BIT            28
 #define FEATURE_THM_CG_BIT              29
 #define FEATURE_CLK_CG_BIT              30
-#define FEATURE_SPARE_31_BIT            31
+#define FEATURE_EDC_BIT                 31
 #define FEATURE_SPARE_32_BIT            32
 #define FEATURE_SPARE_33_BIT            33
 #define FEATURE_SPARE_34_BIT            34
@@ -439,8 +439,11 @@ typedef struct {
   int8_t   XgmiOffset;     // in Amps
   uint8_t  Padding_TelemetryXgmi;
 
+  uint16_t  EdcPowerLimit;
+  uint16_t  spare6;
+
   //reserved
-  uint32_t reserved[15];
+  uint32_t reserved[14];
 
 } PPTable_t;
 
index 8145e1cbf181eeb87318687920cb6ecd04f6081c..1687709507b3d2f2fabc75b38e58f694df03622c 100644 (file)
@@ -26,7 +26,7 @@
 #include "amdgpu_smu.h"
 
 #define SMU13_DRIVER_IF_VERSION_INV 0xFFFFFFFF
-#define SMU13_DRIVER_IF_VERSION_ALDE 0x6
+#define SMU13_DRIVER_IF_VERSION_ALDE 0x07
 
 /* MP Apertures */
 #define MP0_Public                     0x03800000
index ee6340c6f921e51f89db050ddfe9fab1f70c4502..c73504e998e54ae3ecaec8d6a1dcb45f744e220a 100644 (file)
@@ -1651,6 +1651,26 @@ static int pp_gfx_state_change_set(void *handle, uint32_t state)
        return 0;
 }
 
+static int pp_get_prv_buffer_details(void *handle, void **addr, size_t *size)
+{
+       struct pp_hwmgr *hwmgr = handle;
+       struct amdgpu_device *adev = hwmgr->adev;
+
+       if (!addr || !size)
+               return -EINVAL;
+
+       *addr = NULL;
+       *size = 0;
+       mutex_lock(&hwmgr->smu_lock);
+       if (adev->pm.smu_prv_buffer) {
+               amdgpu_bo_kmap(adev->pm.smu_prv_buffer, addr);
+               *size = adev->pm.smu_prv_buffer_size;
+       }
+       mutex_unlock(&hwmgr->smu_lock);
+
+       return 0;
+}
+
 static const struct amd_pm_funcs pp_dpm_funcs = {
        .load_firmware = pp_dpm_load_fw,
        .wait_for_fw_loading_complete = pp_dpm_fw_loading_complete,
@@ -1714,4 +1734,5 @@ static const struct amd_pm_funcs pp_dpm_funcs = {
        .set_xgmi_pstate = pp_set_xgmi_pstate,
        .get_gpu_metrics = pp_get_gpu_metrics,
        .gfx_state_change_set = pp_gfx_state_change_set,
+       .get_smu_prv_buf_details = pp_get_prv_buffer_details,
 };
index b1038d30c8dcc558935d493f5567269532092093..f503e61faa6008f588c9bc243599238cb3cb7269 100644 (file)
@@ -275,7 +275,7 @@ static const ATOM_VOLTAGE_OBJECT_V3 *atomctrl_lookup_voltage_type_v3(
 }
 
 /**
- * atomctrl_get_memory_pll_dividers_si().
+ * atomctrl_get_memory_pll_dividers_si
  *
  * @hwmgr:           input parameter: pointer to HwMgr
  * @clock_value:     input parameter: memory clock
@@ -328,7 +328,7 @@ int atomctrl_get_memory_pll_dividers_si(
 }
 
 /**
- * atomctrl_get_memory_pll_dividers_vi().
+ * atomctrl_get_memory_pll_dividers_vi
  *
  * @hwmgr:                 input parameter: pointer to HwMgr
  * @clock_value:           input parameter: memory clock
@@ -1104,7 +1104,7 @@ int atomctrl_calculate_voltage_evv_on_sclk(
 }
 
 /**
- * atomctrl_get_voltage_evv_on_sclk gets voltage via call to ATOM COMMAND table.
+ * atomctrl_get_voltage_evv_on_sclk: gets voltage via call to ATOM COMMAND table.
  * @hwmgr:              input: pointer to hwManager
  * @voltage_type:       input: type of EVV voltage VDDC or VDDGFX
  * @sclk:               input: in 10Khz unit. DPM state SCLK frequency
@@ -1144,7 +1144,7 @@ int atomctrl_get_voltage_evv_on_sclk(
 }
 
 /**
- * atomctrl_get_voltage_evv gets voltage via call to ATOM COMMAND table.
+ * atomctrl_get_voltage_evv: gets voltage via call to ATOM COMMAND table.
  * @hwmgr:              input: pointer to hwManager
  * @virtual_voltage_id: input: voltage id which match per voltage DPM state: 0xff01, 0xff02.. 0xff08
  * @voltage:          output: real voltage level in unit of mv
index 2a28c9df15a02070eddaf764b5e526214df1db1f..8d99c7a5abf8868d988d03a0f040f1c60b7066b0 100644 (file)
@@ -85,7 +85,7 @@ int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state)
                return 0;
 
        if (state == BACO_STATE_IN) {
-               if (!ras || !ras->supported) {
+               if (!ras || !adev->ras_enabled) {
                        data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL);
                        data |= 0x80000000;
                        WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data);
index c29d8b3131b7eaaf2cecf04313f6d8a7517e4168..32dadeee9514895cd191f7a3e9852de524794e4a 100644 (file)
@@ -2933,6 +2933,26 @@ int smu_set_light_sbr(struct smu_context *smu, bool enable)
        return ret;
 }
 
+static int smu_get_prv_buffer_details(void *handle, void **addr, size_t *size)
+{
+       struct smu_context *smu = handle;
+       struct smu_table_context *smu_table = &smu->smu_table;
+       struct smu_table *memory_pool = &smu_table->memory_pool;
+
+       if (!addr || !size)
+               return -EINVAL;
+
+       *addr = NULL;
+       *size = 0;
+       mutex_lock(&smu->mutex);
+       if (memory_pool->bo) {
+               *addr = memory_pool->cpu_addr;
+               *size = memory_pool->size;
+       }
+       mutex_unlock(&smu->mutex);
+
+       return 0;
+}
 
 static const struct amd_pm_funcs swsmu_pm_funcs = {
        /* export for sysfs */
@@ -2984,6 +3004,7 @@ static const struct amd_pm_funcs swsmu_pm_funcs = {
        .get_max_sustainable_clocks_by_dc    = smu_get_max_sustainable_clocks_by_dc,
        .load_firmware           = smu_load_microcode,
        .gfx_state_change_set    = smu_gfx_state_change_set,
+       .get_smu_prv_buf_details = smu_get_prv_buffer_details,
 };
 
 int smu_wait_for_event(struct amdgpu_device *adev, enum smu_event_type event,
index d2fd44b903ca415274756d7d7bcd634c82950717..270b2b0b8e8a415acf1ecfb41adc7c1b9712a15b 100644 (file)
@@ -302,7 +302,7 @@ sienna_cichlid_get_allowed_feature_mask(struct smu_context *smu,
        if (smu->dc_controlled_by_gpio)
        *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_ACDC_BIT);
 
-       if (amdgpu_aspm == 1)
+       if (amdgpu_aspm)
                *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DS_LCLK_BIT);
 
        return 0;
index 6274cae4a065e415f156be6619c2cd5e9647dec7..a06e6865507d32555d14ca134aa14ea6734108bc 100644 (file)
@@ -1531,7 +1531,8 @@ int smu_v11_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state)
                                                                      NULL);
                        break;
                default:
-                       if (!ras || !ras->supported || adev->gmc.xgmi.pending_reset) {
+                       if (!ras || !adev->ras_enabled ||
+                           adev->gmc.xgmi.pending_reset) {
                                if (adev->asic_type == CHIP_ARCTURUS) {
                                        data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL_ARCT);
                                        data |= 0x80000000;
index dcbe3a72da093a19322396e4746abee5d61ad228..5d04a1dfdfd8ef46eb4ac965f46804f8285bf181 100644 (file)
@@ -209,7 +209,7 @@ static int aldebaran_tables_init(struct smu_context *smu)
                return -ENOMEM;
        smu_table->metrics_time = 0;
 
-       smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_1);
+       smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_2);
        smu_table->gpu_metrics_table = kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
        if (!smu_table->gpu_metrics_table) {
                kfree(smu_table->metrics_table);
@@ -1632,8 +1632,8 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,
                                         void **table)
 {
        struct smu_table_context *smu_table = &smu->smu_table;
-       struct gpu_metrics_v1_1 *gpu_metrics =
-               (struct gpu_metrics_v1_1 *)smu_table->gpu_metrics_table;
+       struct gpu_metrics_v1_2 *gpu_metrics =
+               (struct gpu_metrics_v1_2 *)smu_table->gpu_metrics_table;
        SmuMetrics_t metrics;
        int i, ret = 0;
 
@@ -1643,7 +1643,7 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,
        if (ret)
                return ret;
 
-       smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 1);
+       smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 2);
 
        gpu_metrics->temperature_edge = metrics.TemperatureEdge;
        gpu_metrics->temperature_hotspot = metrics.TemperatureHotspot;
@@ -1657,7 +1657,9 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,
        gpu_metrics->average_mm_activity = 0;
 
        gpu_metrics->average_socket_power = metrics.AverageSocketPower;
-       gpu_metrics->energy_accumulator = 0;
+       gpu_metrics->energy_accumulator =
+                       (uint64_t)metrics.EnergyAcc64bitHigh << 32 |
+                       metrics.EnergyAcc64bitLow;
 
        gpu_metrics->average_gfxclk_frequency = metrics.AverageGfxclkFrequency;
        gpu_metrics->average_socclk_frequency = metrics.AverageSocclkFrequency;
@@ -1688,9 +1690,12 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,
        for (i = 0; i < NUM_HBM_INSTANCES; i++)
                gpu_metrics->temperature_hbm[i] = metrics.TemperatureAllHBM[i];
 
+       gpu_metrics->firmware_timestamp = ((uint64_t)metrics.TimeStampHigh << 32) |
+                                       metrics.TimeStampLow;
+
        *table = (void *)gpu_metrics;
 
-       return sizeof(struct gpu_metrics_v1_1);
+       return sizeof(struct gpu_metrics_v1_2);
 }
 
 static int aldebaran_mode2_reset(struct smu_context *smu)
index dc7d2e71aa6fd9265a577287836fc172c4d6a002..0934e5b3aa171f5ad0f56b38a4e7a44cb982b55d 100644 (file)
@@ -104,8 +104,8 @@ int smu_cmn_send_msg_without_waiting(struct smu_context *smu,
 
        ret = smu_cmn_wait_for_response(smu);
        if (ret != 0x1) {
-               dev_err(adev->dev, "Msg issuing pre-check failed and "
-                      "SMU may be not in the right state!\n");
+               dev_err(adev->dev, "Msg issuing pre-check failed(0x%x) and "
+                      "SMU may be not in the right state!\n", ret);
                if (ret != -ETIME)
                        ret = -EIO;
                return ret;
@@ -761,6 +761,9 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev)
        case METRICS_VERSION(1, 1):
                structure_size = sizeof(struct gpu_metrics_v1_1);
                break;
+       case METRICS_VERSION(1, 2):
+               structure_size = sizeof(struct gpu_metrics_v1_2);
+               break;
        case METRICS_VERSION(2, 0):
                structure_size = sizeof(struct gpu_metrics_v2_0);
                break;
index cc445c4cba2e3dc0effd366256cb30df210455bb..46eea01950cb13b62b93200783800e3cf07ebd72 100644 (file)
@@ -1098,7 +1098,8 @@ static bool radeon_check_pot_argument(int arg)
 }
 
 /**
- * Determine a sensible default GART size according to ASIC family.
+ * radeon_gart_size_auto - Determine a sensible default GART size
+ *                         according to ASIC family.
  *
  * @family: ASIC family name
  */
index 728566542f8a1ed9d7ab359d4f2fb686dad85575..2063a1c10f793b1de20909c041b43d7fb87a853e 100644 (file)
@@ -116,8 +116,6 @@ extern "C" {
 #define AMDGPU_GEM_CREATE_CPU_GTT_USWC         (1 << 2)
 /* Flag that the memory should be in VRAM and cleared */
 #define AMDGPU_GEM_CREATE_VRAM_CLEARED         (1 << 3)
-/* Flag that create shadow bo(GTT) while allocating vram bo */
-#define AMDGPU_GEM_CREATE_SHADOW               (1 << 4)
 /* Flag that allocating the BO should use linear VRAM */
 #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS      (1 << 5)
 /* Flag that BO is always valid in this VM */
index bf5e7d7846dd534aba838c3671142b21a645a397..3cb5b5dd9f77e710006dbe26c40a5de46e8bd1b6 100644 (file)
  * - 1.1 - initial version
  * - 1.3 - Add SMI events support
  * - 1.4 - Indicate new SRAM EDC bit in device properties
+ * - 1.5 - Add SVM API
  */
 #define KFD_IOCTL_MAJOR_VERSION 1
-#define KFD_IOCTL_MINOR_VERSION 4
+#define KFD_IOCTL_MINOR_VERSION 5
 
 struct kfd_ioctl_get_version_args {
        __u32 major_version;    /* from KFD */
@@ -473,6 +474,167 @@ enum kfd_mmio_remap {
        KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4,
 };
 
+/* Guarantee host access to memory */
+#define KFD_IOCTL_SVM_FLAG_HOST_ACCESS 0x00000001
+/* Fine grained coherency between all devices with access */
+#define KFD_IOCTL_SVM_FLAG_COHERENT    0x00000002
+/* Use any GPU in same hive as preferred device */
+#define KFD_IOCTL_SVM_FLAG_HIVE_LOCAL  0x00000004
+/* GPUs only read, allows replication */
+#define KFD_IOCTL_SVM_FLAG_GPU_RO      0x00000008
+/* Allow execution on GPU */
+#define KFD_IOCTL_SVM_FLAG_GPU_EXEC    0x00000010
+/* GPUs mostly read, may allow similar optimizations as RO, but writes fault */
+#define KFD_IOCTL_SVM_FLAG_GPU_READ_MOSTLY     0x00000020
+
+/**
+ * kfd_ioctl_svm_op - SVM ioctl operations
+ *
+ * @KFD_IOCTL_SVM_OP_SET_ATTR: Modify one or more attributes
+ * @KFD_IOCTL_SVM_OP_GET_ATTR: Query one or more attributes
+ */
+enum kfd_ioctl_svm_op {
+       KFD_IOCTL_SVM_OP_SET_ATTR,
+       KFD_IOCTL_SVM_OP_GET_ATTR
+};
+
+/** kfd_ioctl_svm_location - Enum for preferred and prefetch locations
+ *
+ * GPU IDs are used to specify GPUs as preferred and prefetch locations.
+ * Below definitions are used for system memory or for leaving the preferred
+ * location unspecified.
+ */
+enum kfd_ioctl_svm_location {
+       KFD_IOCTL_SVM_LOCATION_SYSMEM = 0,
+       KFD_IOCTL_SVM_LOCATION_UNDEFINED = 0xffffffff
+};
+
+/**
+ * kfd_ioctl_svm_attr_type - SVM attribute types
+ *
+ * @KFD_IOCTL_SVM_ATTR_PREFERRED_LOC: gpuid of the preferred location, 0 for
+ *                                    system memory
+ * @KFD_IOCTL_SVM_ATTR_PREFETCH_LOC: gpuid of the prefetch location, 0 for
+ *                                   system memory. Setting this triggers an
+ *                                   immediate prefetch (migration).
+ * @KFD_IOCTL_SVM_ATTR_ACCESS:
+ * @KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
+ * @KFD_IOCTL_SVM_ATTR_NO_ACCESS: specify memory access for the gpuid given
+ *                                by the attribute value
+ * @KFD_IOCTL_SVM_ATTR_SET_FLAGS: bitmask of flags to set (see
+ *                                KFD_IOCTL_SVM_FLAG_...)
+ * @KFD_IOCTL_SVM_ATTR_CLR_FLAGS: bitmask of flags to clear
+ * @KFD_IOCTL_SVM_ATTR_GRANULARITY: migration granularity
+ *                                  (log2 num pages)
+ */
+enum kfd_ioctl_svm_attr_type {
+       KFD_IOCTL_SVM_ATTR_PREFERRED_LOC,
+       KFD_IOCTL_SVM_ATTR_PREFETCH_LOC,
+       KFD_IOCTL_SVM_ATTR_ACCESS,
+       KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE,
+       KFD_IOCTL_SVM_ATTR_NO_ACCESS,
+       KFD_IOCTL_SVM_ATTR_SET_FLAGS,
+       KFD_IOCTL_SVM_ATTR_CLR_FLAGS,
+       KFD_IOCTL_SVM_ATTR_GRANULARITY
+};
+
+/**
+ * kfd_ioctl_svm_attribute - Attributes as pairs of type and value
+ *
+ * The meaning of the @value depends on the attribute type.
+ *
+ * @type: attribute type (see enum @kfd_ioctl_svm_attr_type)
+ * @value: attribute value
+ */
+struct kfd_ioctl_svm_attribute {
+       __u32 type;
+       __u32 value;
+};
+
+/**
+ * kfd_ioctl_svm_args - Arguments for SVM ioctl
+ *
+ * @op specifies the operation to perform (see enum
+ * @kfd_ioctl_svm_op).  @start_addr and @size are common for all
+ * operations.
+ *
+ * A variable number of attributes can be given in @attrs.
+ * @nattr specifies the number of attributes. New attributes can be
+ * added in the future without breaking the ABI. If unknown attributes
+ * are given, the function returns -EINVAL.
+ *
+ * @KFD_IOCTL_SVM_OP_SET_ATTR sets attributes for a virtual address
+ * range. It may overlap existing virtual address ranges. If it does,
+ * the existing ranges will be split such that the attribute changes
+ * only apply to the specified address range.
+ *
+ * @KFD_IOCTL_SVM_OP_GET_ATTR returns the intersection of attributes
+ * over all memory in the given range and returns the result as the
+ * attribute value. If different pages have different preferred or
+ * prefetch locations, 0xffffffff will be returned for
+ * @KFD_IOCTL_SVM_ATTR_PREFERRED_LOC or
+ * @KFD_IOCTL_SVM_ATTR_PREFETCH_LOC resepctively. For
+ * @KFD_IOCTL_SVM_ATTR_SET_FLAGS, flags of all pages will be
+ * aggregated by bitwise AND. The minimum  migration granularity
+ * throughout the range will be returned for
+ * @KFD_IOCTL_SVM_ATTR_GRANULARITY.
+ *
+ * Querying of accessibility attributes works by initializing the
+ * attribute type to @KFD_IOCTL_SVM_ATTR_ACCESS and the value to the
+ * GPUID being queried. Multiple attributes can be given to allow
+ * querying multiple GPUIDs. The ioctl function overwrites the
+ * attribute type to indicate the access for the specified GPU.
+ *
+ * @KFD_IOCTL_SVM_ATTR_CLR_FLAGS is invalid for
+ * @KFD_IOCTL_SVM_OP_GET_ATTR.
+ */
+struct kfd_ioctl_svm_args {
+       __u64 start_addr;
+       __u64 size;
+       __u32 op;
+       __u32 nattr;
+       /* Variable length array of attributes */
+       struct kfd_ioctl_svm_attribute attrs[0];
+};
+
+/**
+ * kfd_ioctl_set_xnack_mode_args - Arguments for set_xnack_mode
+ *
+ * @xnack_enabled:       [in/out] Whether to enable XNACK mode for this process
+ *
+ * @xnack_enabled indicates whether recoverable page faults should be
+ * enabled for the current process. 0 means disabled, positive means
+ * enabled, negative means leave unchanged. If enabled, virtual address
+ * translations on GFXv9 and later AMD GPUs can return XNACK and retry
+ * the access until a valid PTE is available. This is used to implement
+ * device page faults.
+ *
+ * On output, @xnack_enabled returns the (new) current mode (0 or
+ * positive). Therefore, a negative input value can be used to query
+ * the current mode without changing it.
+ *
+ * The XNACK mode fundamentally changes the way SVM managed memory works
+ * in the driver, with subtle effects on application performance and
+ * functionality.
+ *
+ * Enabling XNACK mode requires shader programs to be compiled
+ * differently. Furthermore, not all GPUs support changing the mode
+ * per-process. Therefore changing the mode is only allowed while no
+ * user mode queues exist in the process. This ensure that no shader
+ * code is running that may be compiled for the wrong mode. And GPUs
+ * that cannot change to the requested mode will prevent the XNACK
+ * mode from occurring. All GPUs used by the process must be in the
+ * same XNACK mode.
+ *
+ * GFXv8 or older GPUs do not support 48 bit virtual addresses or SVM.
+ * Therefore those GPUs are not considered for the XNACK mode switch.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+struct kfd_ioctl_set_xnack_mode_args {
+       __s32 xnack_enabled;
+};
+
 #define AMDKFD_IOCTL_BASE 'K'
 #define AMDKFD_IO(nr)                  _IO(AMDKFD_IOCTL_BASE, nr)
 #define AMDKFD_IOR(nr, type)           _IOR(AMDKFD_IOCTL_BASE, nr, type)
@@ -573,7 +735,12 @@ enum kfd_mmio_remap {
 #define AMDKFD_IOC_SMI_EVENTS                  \
                AMDKFD_IOWR(0x1F, struct kfd_ioctl_smi_events_args)
 
+#define AMDKFD_IOC_SVM AMDKFD_IOWR(0x20, struct kfd_ioctl_svm_args)
+
+#define AMDKFD_IOC_SET_XNACK_MODE              \
+               AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args)
+
 #define AMDKFD_COMMAND_START           0x01
-#define AMDKFD_COMMAND_END             0x20
+#define AMDKFD_COMMAND_END             0x22
 
 #endif