Merge branch 'drm-next-4.14' of git://people.freedesktop.org/~agd5f/linux into drm...
authorDave Airlie <airlied@redhat.com>
Wed, 2 Aug 2017 02:43:12 +0000 (12:43 +1000)
committerDave Airlie <airlied@redhat.com>
Wed, 2 Aug 2017 02:43:12 +0000 (12:43 +1000)
- Stop reprogramming the MC, the vbios already does this in asic_init
- Reduce internal gart to 256M (this does not affect the ttm GTT pool size)
- Initial support for huge pages
- Rework bo migration logic
- Lots of improvements for vega10
- Powerplay fixes
- Additional Raven enablement
- SR-IOV improvements
- Bug fixes
- Code cleanup

* 'drm-next-4.14' of git://people.freedesktop.org/~agd5f/linux: (138 commits)
  drm/amdgpu: fix header on gfx9 clear state
  drm/amdgpu: reduce the time of reading VBIOS
  drm/amdgpu/virtual_dce: Remove the rmmod error message
  drm/amdgpu/gmc9: disable legacy vga features in gmc init
  drm/amdgpu/gmc8: disable legacy vga features in gmc init
  drm/amdgpu/gmc7: disable legacy vga features in gmc init
  drm/amdgpu/gmc6: disable legacy vga features in gmc init (v2)
  drm/radeon: Set depth on low mem to 16 bpp instead of 8 bpp
  drm/amdgpu: fix the incorrect scratch reg number on gfx v6
  drm/amdgpu: fix the incorrect scratch reg number on gfx v7
  drm/amdgpu: fix the incorrect scratch reg number on gfx v8
  drm/amdgpu: fix the incorrect scratch reg number on gfx v9
  drm/amd/powerplay: add support for 3DP 4K@120Hz on vega10.
  drm/amdgpu: enable huge page handling in the VM v5
  drm/amdgpu: increase fragmentation size for Vega10 v2
  drm/amdgpu: ttm_bind only when user needs gpu_addr in bo pin
  drm/amdgpu: correct clock info for SRIOV
  drm/amdgpu/gmc8: SRIOV need to program fb location
  drm/amdgpu: disable firmware loading for psp v10
  drm/amdgpu:fix gfx fence allocate size
  ...

110 files changed:
drivers/gpu/drm/amd/amdgpu/Makefile
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h [new file with mode: 0644]
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c [new file with mode: 0644]
drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h [new file with mode: 0644]
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
drivers/gpu/drm/amd/amdgpu/cik.c
drivers/gpu/drm/amd/amdgpu/cik_sdma.c
drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h
drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
drivers/gpu/drm/amd/amdgpu/dce_virtual.c
drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h
drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h
drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
drivers/gpu/drm/amd/amdgpu/psp_v10_0.h
drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
drivers/gpu/drm/amd/amdgpu/si.c
drivers/gpu/drm/amd/amdgpu/si_dpm.c
drivers/gpu/drm/amd/amdgpu/soc15.c
drivers/gpu/drm/amd/amdgpu/soc15_common.h
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/include/atomfirmware.h
drivers/gpu/drm/amd/include/cgs_common.h
drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h
drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h
drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c
drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.h
drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
drivers/gpu/drm/amd/powerplay/inc/pp_debug.h
drivers/gpu/drm/amd/powerplay/inc/pp_soc15.h
drivers/gpu/drm/amd/powerplay/inc/smu9.h
drivers/gpu/drm/amd/powerplay/inc/smu9_driver_if.h
drivers/gpu/drm/amd/powerplay/inc/smumgr.h
drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h
drivers/gpu/drm/amd/powerplay/smumgr/fiji_smc.c
drivers/gpu/drm/amd/powerplay/smumgr/fiji_smc.h
drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.h
drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.c
drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.h
drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c
drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h
drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
drivers/gpu/drm/radeon/radeon_display.c
drivers/gpu/drm/radeon/radeon_fb.c
drivers/gpu/drm/radeon/radeon_irq_kms.c
drivers/gpu/drm/radeon/radeon_ttm.c
drivers/gpu/drm/radeon/vce_v2_0.c
drivers/gpu/drm/ttm/ttm_bo_vm.c
include/drm/ttm/ttm_bo_driver.h

index faea6349228fe4a566464eaa467e1a77eaf5fd4d..658bac0cdc5e96094499a13c2183de9f56e6f12f 100644 (file)
@@ -25,7 +25,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
        amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
        amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
        amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
-       amdgpu_queue_mgr.o
+       amdgpu_queue_mgr.o amdgpu_vf_error.o
 
 # add asic specific block
 amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
index ff7bf1a9f96780408027d4273101666df7335522..51d1364cf1856f0d6e3a97820f96db7ebc3c1879 100644 (file)
 
 #include "gpu_scheduler.h"
 #include "amdgpu_virt.h"
+#include "amdgpu_gart.h"
 
 /*
  * Modules parameters.
  */
 extern int amdgpu_modeset;
 extern int amdgpu_vram_limit;
-extern int amdgpu_gart_size;
+extern int amdgpu_vis_vram_limit;
+extern unsigned amdgpu_gart_size;
+extern int amdgpu_gtt_size;
 extern int amdgpu_moverate;
 extern int amdgpu_benchmarking;
 extern int amdgpu_testing;
@@ -104,6 +107,7 @@ extern unsigned amdgpu_pcie_gen_cap;
 extern unsigned amdgpu_pcie_lane_cap;
 extern unsigned amdgpu_cg_mask;
 extern unsigned amdgpu_pg_mask;
+extern unsigned amdgpu_sdma_phase_quantum;
 extern char *amdgpu_disable_cu;
 extern char *amdgpu_virtual_display;
 extern unsigned amdgpu_pp_feature_mask;
@@ -531,49 +535,6 @@ int amdgpu_mode_dumb_mmap(struct drm_file *filp,
 int amdgpu_fence_slab_init(void);
 void amdgpu_fence_slab_fini(void);
 
-/*
- * GART structures, functions & helpers
- */
-struct amdgpu_mc;
-
-#define AMDGPU_GPU_PAGE_SIZE 4096
-#define AMDGPU_GPU_PAGE_MASK (AMDGPU_GPU_PAGE_SIZE - 1)
-#define AMDGPU_GPU_PAGE_SHIFT 12
-#define AMDGPU_GPU_PAGE_ALIGN(a) (((a) + AMDGPU_GPU_PAGE_MASK) & ~AMDGPU_GPU_PAGE_MASK)
-
-struct amdgpu_gart {
-       dma_addr_t                      table_addr;
-       struct amdgpu_bo                *robj;
-       void                            *ptr;
-       unsigned                        num_gpu_pages;
-       unsigned                        num_cpu_pages;
-       unsigned                        table_size;
-#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
-       struct page                     **pages;
-#endif
-       bool                            ready;
-
-       /* Asic default pte flags */
-       uint64_t                        gart_pte_flags;
-
-       const struct amdgpu_gart_funcs *gart_funcs;
-};
-
-int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev);
-void amdgpu_gart_table_ram_free(struct amdgpu_device *adev);
-int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev);
-void amdgpu_gart_table_vram_free(struct amdgpu_device *adev);
-int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
-void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
-int amdgpu_gart_init(struct amdgpu_device *adev);
-void amdgpu_gart_fini(struct amdgpu_device *adev);
-int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
-                       int pages);
-int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
-                    int pages, struct page **pagelist,
-                    dma_addr_t *dma_addr, uint64_t flags);
-int amdgpu_ttm_recover_gart(struct amdgpu_device *adev);
-
 /*
  * VMHUB structures, functions & helpers
  */
@@ -598,22 +559,20 @@ struct amdgpu_mc {
         * about vram size near mc fb location */
        u64                     mc_vram_size;
        u64                     visible_vram_size;
-       u64                     gtt_size;
-       u64                     gtt_start;
-       u64                     gtt_end;
+       u64                     gart_size;
+       u64                     gart_start;
+       u64                     gart_end;
        u64                     vram_start;
        u64                     vram_end;
        unsigned                vram_width;
        u64                     real_vram_size;
        int                     vram_mtrr;
-       u64                     gtt_base_align;
        u64                     mc_mask;
        const struct firmware   *fw;    /* MC firmware */
        uint32_t                fw_version;
        struct amdgpu_irq_src   vm_fault;
        uint32_t                vram_type;
        uint32_t                srbm_soft_reset;
-       struct amdgpu_mode_mc_save save;
        bool                    prt_warning;
        uint64_t                stolen_size;
        /* apertures */
@@ -1159,7 +1118,9 @@ struct amdgpu_cs_parser {
        struct list_head                validated;
        struct dma_fence                *fence;
        uint64_t                        bytes_moved_threshold;
+       uint64_t                        bytes_moved_vis_threshold;
        uint64_t                        bytes_moved;
+       uint64_t                        bytes_moved_vis;
        struct amdgpu_bo_list_entry     *evictable;
 
        /* user fence */
@@ -1231,7 +1192,9 @@ struct amdgpu_wb {
 int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb);
 void amdgpu_wb_free(struct amdgpu_device *adev, u32 wb);
 int amdgpu_wb_get_64bit(struct amdgpu_device *adev, u32 *wb);
+int amdgpu_wb_get_256Bit(struct amdgpu_device *adev, u32 *wb);
 void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb);
+void amdgpu_wb_free_256bit(struct amdgpu_device *adev, u32 wb);
 
 void amdgpu_get_pcie_info(struct amdgpu_device *adev);
 
@@ -1557,6 +1520,10 @@ struct amdgpu_device {
        spinlock_t gc_cac_idx_lock;
        amdgpu_rreg_t                   gc_cac_rreg;
        amdgpu_wreg_t                   gc_cac_wreg;
+       /* protects concurrent se_cac register access */
+       spinlock_t se_cac_idx_lock;
+       amdgpu_rreg_t                   se_cac_rreg;
+       amdgpu_wreg_t                   se_cac_wreg;
        /* protects concurrent ENDPOINT (audio) register access */
        spinlock_t audio_endpt_idx_lock;
        amdgpu_block_rreg_t             audio_endpt_rreg;
@@ -1593,6 +1560,7 @@ struct amdgpu_device {
                spinlock_t              lock;
                s64                     last_update_us;
                s64                     accum_us; /* accumulated microseconds */
+               s64                     accum_us_vis; /* for visible VRAM */
                u32                     log2_max_MBps;
        } mm_stats;
 
@@ -1687,6 +1655,8 @@ struct amdgpu_device {
        bool has_hw_reset;
        u8                              reset_magic[AMDGPU_RESET_MAGIC_NUM];
 
+       /* record last mm index being written through WREG32*/
+       unsigned long last_mm_index;
 };
 
 static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
@@ -1742,6 +1712,8 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);
 #define WREG32_DIDT(reg, v) adev->didt_wreg(adev, (reg), (v))
 #define RREG32_GC_CAC(reg) adev->gc_cac_rreg(adev, (reg))
 #define WREG32_GC_CAC(reg, v) adev->gc_cac_wreg(adev, (reg), (v))
+#define RREG32_SE_CAC(reg) adev->se_cac_rreg(adev, (reg))
+#define WREG32_SE_CAC(reg, v) adev->se_cac_wreg(adev, (reg), (v))
 #define RREG32_AUDIO_ENDPT(block, reg) adev->audio_endpt_rreg(adev, (block), (reg))
 #define WREG32_AUDIO_ENDPT(block, reg, v) adev->audio_endpt_wreg(adev, (block), (reg), (v))
 #define WREG32_P(reg, val, mask)                               \
@@ -1792,50 +1764,6 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);
 #define RBIOS16(i) (RBIOS8(i) | (RBIOS8((i)+1) << 8))
 #define RBIOS32(i) ((RBIOS16(i)) | (RBIOS16((i)+2) << 16))
 
-/*
- * RING helpers.
- */
-static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
-{
-       if (ring->count_dw <= 0)
-               DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
-       ring->ring[ring->wptr++ & ring->buf_mask] = v;
-       ring->wptr &= ring->ptr_mask;
-       ring->count_dw--;
-}
-
-static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void *src, int count_dw)
-{
-       unsigned occupied, chunk1, chunk2;
-       void *dst;
-
-       if (unlikely(ring->count_dw < count_dw)) {
-               DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
-               return;
-       }
-
-       occupied = ring->wptr & ring->buf_mask;
-       dst = (void *)&ring->ring[occupied];
-       chunk1 = ring->buf_mask + 1 - occupied;
-       chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1;
-       chunk2 = count_dw - chunk1;
-       chunk1 <<= 2;
-       chunk2 <<= 2;
-
-       if (chunk1)
-               memcpy(dst, src, chunk1);
-
-       if (chunk2) {
-               src += chunk1;
-               dst = (void *)ring->ring;
-               memcpy(dst, src, chunk2);
-       }
-
-       ring->wptr += count_dw;
-       ring->wptr &= ring->ptr_mask;
-       ring->count_dw -= count_dw;
-}
-
 static inline struct amdgpu_sdma_instance *
 amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 {
@@ -1898,7 +1826,6 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
 #define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
 #define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
-#define amdgpu_display_set_vga_render_state(adev, r) (adev)->mode_info.funcs->set_vga_render_state((adev), (r))
 #define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc))
 #define amdgpu_display_vblank_wait(adev, crtc) (adev)->mode_info.funcs->vblank_wait((adev), (crtc))
 #define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l))
@@ -1911,8 +1838,6 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_display_page_flip_get_scanoutpos(adev, crtc, vbl, pos) (adev)->mode_info.funcs->page_flip_get_scanoutpos((adev), (crtc), (vbl), (pos))
 #define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c))
 #define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
-#define amdgpu_display_stop_mc_access(adev, s) (adev)->mode_info.funcs->stop_mc_access((adev), (s))
-#define amdgpu_display_resume_mc_access(adev, s) (adev)->mode_info.funcs->resume_mc_access((adev), (s))
 #define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib),  (s), (d), (b))
 #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
 #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
@@ -1927,7 +1852,8 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev);
 bool amdgpu_need_post(struct amdgpu_device *adev);
 void amdgpu_update_display_priority(struct amdgpu_device *adev);
 
-void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes);
+void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
+                                 u64 num_vis_bytes);
 void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
 bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
@@ -1943,7 +1869,7 @@ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
 uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
                                 struct ttm_mem_reg *mem);
 void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 base);
-void amdgpu_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc);
+void amdgpu_gart_location(struct amdgpu_device *adev, struct amdgpu_mc *mc);
 void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size);
 int amdgpu_ttm_init(struct amdgpu_device *adev);
 void amdgpu_ttm_fini(struct amdgpu_device *adev);
index 1e8e1123ddf416f18176cbc6e82fa791b3df9fb5..ce443586a0c71c13bd36472558473c0beae9b081 100644 (file)
@@ -1686,7 +1686,7 @@ void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock)
 {
        uint32_t bios_6_scratch;
 
-       bios_6_scratch = RREG32(mmBIOS_SCRATCH_6);
+       bios_6_scratch = RREG32(adev->bios_scratch_reg_offset + 6);
 
        if (lock) {
                bios_6_scratch |= ATOM_S6_CRITICAL_STATE;
@@ -1696,15 +1696,17 @@ void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock)
                bios_6_scratch |= ATOM_S6_ACC_MODE;
        }
 
-       WREG32(mmBIOS_SCRATCH_6, bios_6_scratch);
+       WREG32(adev->bios_scratch_reg_offset + 6, bios_6_scratch);
 }
 
 void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev)
 {
        uint32_t bios_2_scratch, bios_6_scratch;
 
-       bios_2_scratch = RREG32(mmBIOS_SCRATCH_2);
-       bios_6_scratch = RREG32(mmBIOS_SCRATCH_6);
+       adev->bios_scratch_reg_offset = mmBIOS_SCRATCH_0;
+
+       bios_2_scratch = RREG32(adev->bios_scratch_reg_offset + 2);
+       bios_6_scratch = RREG32(adev->bios_scratch_reg_offset + 6);
 
        /* let the bios control the backlight */
        bios_2_scratch &= ~ATOM_S2_VRI_BRIGHT_ENABLE;
@@ -1715,8 +1717,8 @@ void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev)
        /* clear the vbios dpms state */
        bios_2_scratch &= ~ATOM_S2_DEVICE_DPMS_STATE;
 
-       WREG32(mmBIOS_SCRATCH_2, bios_2_scratch);
-       WREG32(mmBIOS_SCRATCH_6, bios_6_scratch);
+       WREG32(adev->bios_scratch_reg_offset + 2, bios_2_scratch);
+       WREG32(adev->bios_scratch_reg_offset + 6, bios_6_scratch);
 }
 
 void amdgpu_atombios_scratch_regs_save(struct amdgpu_device *adev)
@@ -1724,7 +1726,7 @@ void amdgpu_atombios_scratch_regs_save(struct amdgpu_device *adev)
        int i;
 
        for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++)
-               adev->bios_scratch[i] = RREG32(mmBIOS_SCRATCH_0 + i);
+               adev->bios_scratch[i] = RREG32(adev->bios_scratch_reg_offset + i);
 }
 
 void amdgpu_atombios_scratch_regs_restore(struct amdgpu_device *adev)
@@ -1738,20 +1740,30 @@ void amdgpu_atombios_scratch_regs_restore(struct amdgpu_device *adev)
        adev->bios_scratch[7] &= ~ATOM_S7_ASIC_INIT_COMPLETE_MASK;
 
        for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++)
-               WREG32(mmBIOS_SCRATCH_0 + i, adev->bios_scratch[i]);
+               WREG32(adev->bios_scratch_reg_offset + i, adev->bios_scratch[i]);
 }
 
 void amdgpu_atombios_scratch_regs_engine_hung(struct amdgpu_device *adev,
                                              bool hung)
 {
-       u32 tmp = RREG32(mmBIOS_SCRATCH_3);
+       u32 tmp = RREG32(adev->bios_scratch_reg_offset + 3);
 
        if (hung)
                tmp |= ATOM_S3_ASIC_GUI_ENGINE_HUNG;
        else
                tmp &= ~ATOM_S3_ASIC_GUI_ENGINE_HUNG;
 
-       WREG32(mmBIOS_SCRATCH_3, tmp);
+       WREG32(adev->bios_scratch_reg_offset + 3, tmp);
+}
+
+bool amdgpu_atombios_scratch_need_asic_init(struct amdgpu_device *adev)
+{
+       u32 tmp = RREG32(adev->bios_scratch_reg_offset + 7);
+
+       if (tmp & ATOM_S7_ASIC_INIT_COMPLETE_MASK)
+               return false;
+       else
+               return true;
 }
 
 /* Atom needs data in little endian format
index 38d0fe32e5cd4218fad2244597930c90465cc7e9..b0d5d1d7fdba15d6674fdfbc744c90592cee8c8c 100644 (file)
@@ -200,6 +200,7 @@ void amdgpu_atombios_scratch_regs_save(struct amdgpu_device *adev);
 void amdgpu_atombios_scratch_regs_restore(struct amdgpu_device *adev);
 void amdgpu_atombios_scratch_regs_engine_hung(struct amdgpu_device *adev,
                                              bool hung);
+bool amdgpu_atombios_scratch_need_asic_init(struct amdgpu_device *adev);
 
 void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le);
 int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type,
index 4bdda56fcceea36b8229d74ac3b76fe706919a77..f9ffe8ef0cd60a85ae4180f66727f791e8679622 100644 (file)
@@ -66,41 +66,6 @@ void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev)
        }
 }
 
-void amdgpu_atomfirmware_scratch_regs_save(struct amdgpu_device *adev)
-{
-       int i;
-
-       for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++)
-               adev->bios_scratch[i] = RREG32(adev->bios_scratch_reg_offset + i);
-}
-
-void amdgpu_atomfirmware_scratch_regs_restore(struct amdgpu_device *adev)
-{
-       int i;
-
-       /*
-        * VBIOS will check ASIC_INIT_COMPLETE bit to decide if
-        * execute ASIC_Init posting via driver
-        */
-       adev->bios_scratch[7] &= ~ATOM_S7_ASIC_INIT_COMPLETE_MASK;
-
-       for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++)
-               WREG32(adev->bios_scratch_reg_offset + i, adev->bios_scratch[i]);
-}
-
-void amdgpu_atomfirmware_scratch_regs_engine_hung(struct amdgpu_device *adev,
-                                                 bool hung)
-{
-       u32 tmp = RREG32(adev->bios_scratch_reg_offset + 3);
-
-       if (hung)
-               tmp |= ATOM_S3_ASIC_GUI_ENGINE_HUNG;
-       else
-               tmp &= ~ATOM_S3_ASIC_GUI_ENGINE_HUNG;
-
-       WREG32(adev->bios_scratch_reg_offset + 3, tmp);
-}
-
 int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev)
 {
        struct atom_context *ctx = adev->mode_info.atom_context;
@@ -130,3 +95,129 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev)
        ctx->scratch_size_bytes = usage_bytes;
        return 0;
 }
+
+union igp_info {
+       struct atom_integrated_system_info_v1_11 v11;
+};
+
+/*
+ * Return vram width from integrated system info table, if available,
+ * or 0 if not.
+ */
+int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev)
+{
+       struct amdgpu_mode_info *mode_info = &adev->mode_info;
+       int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+                                               integratedsysteminfo);
+       u16 data_offset, size;
+       union igp_info *igp_info;
+       u8 frev, crev;
+
+       /* get any igp specific overrides */
+       if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size,
+                                  &frev, &crev, &data_offset)) {
+               igp_info = (union igp_info *)
+                       (mode_info->atom_context->bios + data_offset);
+               switch (crev) {
+               case 11:
+                       return igp_info->v11.umachannelnumber * 64;
+               default:
+                       return 0;
+               }
+       }
+
+       return 0;
+}
+
+union firmware_info {
+       struct atom_firmware_info_v3_1 v31;
+};
+
+union smu_info {
+       struct atom_smu_info_v3_1 v31;
+};
+
+union umc_info {
+       struct atom_umc_info_v3_1 v31;
+};
+
+int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev)
+{
+       struct amdgpu_mode_info *mode_info = &adev->mode_info;
+       struct amdgpu_pll *spll = &adev->clock.spll;
+       struct amdgpu_pll *mpll = &adev->clock.mpll;
+       uint8_t frev, crev;
+       uint16_t data_offset;
+       int ret = -EINVAL, index;
+
+       index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+                                           firmwareinfo);
+       if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+                                  &frev, &crev, &data_offset)) {
+               union firmware_info *firmware_info =
+                       (union firmware_info *)(mode_info->atom_context->bios +
+                                               data_offset);
+
+               adev->clock.default_sclk =
+                       le32_to_cpu(firmware_info->v31.bootup_sclk_in10khz);
+               adev->clock.default_mclk =
+                       le32_to_cpu(firmware_info->v31.bootup_mclk_in10khz);
+
+               adev->pm.current_sclk = adev->clock.default_sclk;
+               adev->pm.current_mclk = adev->clock.default_mclk;
+
+               /* not technically a clock, but... */
+               adev->mode_info.firmware_flags =
+                       le32_to_cpu(firmware_info->v31.firmware_capability);
+
+               ret = 0;
+       }
+
+       index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+                                           smu_info);
+       if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+                                  &frev, &crev, &data_offset)) {
+               union smu_info *smu_info =
+                       (union smu_info *)(mode_info->atom_context->bios +
+                                          data_offset);
+
+               /* system clock */
+               spll->reference_freq = le32_to_cpu(smu_info->v31.core_refclk_10khz);
+
+               spll->reference_div = 0;
+               spll->min_post_div = 1;
+               spll->max_post_div = 1;
+               spll->min_ref_div = 2;
+               spll->max_ref_div = 0xff;
+               spll->min_feedback_div = 4;
+               spll->max_feedback_div = 0xff;
+               spll->best_vco = 0;
+
+               ret = 0;
+       }
+
+       index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+                                           umc_info);
+       if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+                                  &frev, &crev, &data_offset)) {
+               union umc_info *umc_info =
+                       (union umc_info *)(mode_info->atom_context->bios +
+                                          data_offset);
+
+               /* memory clock */
+               mpll->reference_freq = le32_to_cpu(umc_info->v31.mem_refclk_10khz);
+
+               mpll->reference_div = 0;
+               mpll->min_post_div = 1;
+               mpll->max_post_div = 1;
+               mpll->min_ref_div = 2;
+               mpll->max_ref_div = 0xff;
+               mpll->min_feedback_div = 4;
+               mpll->max_feedback_div = 0xff;
+               mpll->best_vco = 0;
+
+               ret = 0;
+       }
+
+       return ret;
+}
index a2c3ebe22c713aeacff5036d431ffc1e157f5f7e..288b97e543478b48f2298cf3e442b03f99a91d69 100644 (file)
 
 bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev);
 void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev);
-void amdgpu_atomfirmware_scratch_regs_save(struct amdgpu_device *adev);
-void amdgpu_atomfirmware_scratch_regs_restore(struct amdgpu_device *adev);
-void amdgpu_atomfirmware_scratch_regs_engine_hung(struct amdgpu_device *adev,
-                                                 bool hung);
 int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev);
 
 #endif
index 1beae5b930d0e10d407501708a0585e56001181e..2fb299afc12b7e9a51c853b5ff1b048172856e4d 100644 (file)
@@ -40,7 +40,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
        for (i = 0; i < n; i++) {
                struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
                r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence,
-                                      false);
+                                      false, false);
                if (r)
                        goto exit_do_move;
                r = dma_fence_wait(fence, false);
index 365e735f664744042cec72b939a06078444a69ae..c21adf60a7f200ba6a4faaa32c163157d00da9ce 100644 (file)
@@ -86,19 +86,6 @@ static bool check_atom_bios(uint8_t *bios, size_t size)
        return false;
 }
 
-static bool is_atom_fw(uint8_t *bios)
-{
-       uint16_t bios_header_start = bios[0x48] | (bios[0x49] << 8);
-       uint8_t frev = bios[bios_header_start + 2];
-       uint8_t crev = bios[bios_header_start + 3];
-
-       if ((frev < 3) ||
-           ((frev == 3) && (crev < 3)))
-               return false;
-
-       return true;
-}
-
 /* If you boot an IGP board with a discrete card as the primary,
  * the IGP rom is not accessible via the rom bar as the IGP rom is
  * part of the system bios.  On boot, the system bios puts a
@@ -117,7 +104,7 @@ static bool igp_read_bios_from_vram(struct amdgpu_device *adev)
 
        adev->bios = NULL;
        vram_base = pci_resource_start(adev->pdev, 0);
-       bios = ioremap(vram_base, size);
+       bios = ioremap_wc(vram_base, size);
        if (!bios) {
                return false;
        }
@@ -455,6 +442,6 @@ bool amdgpu_get_bios(struct amdgpu_device *adev)
        return false;
 
 success:
-       adev->is_atom_fw = is_atom_fw(adev->bios);
+       adev->is_atom_fw = (adev->asic_type >= CHIP_VEGA10) ? true : false;
        return true;
 }
index f621ee115c98d4e5d5a4faa0d845864dc65a0b8c..d324e1c240280d23ae26455cfb321561b0cf0494 100644 (file)
@@ -83,7 +83,7 @@ static int amdgpu_bo_list_create(struct amdgpu_device *adev,
        r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
        mutex_unlock(&fpriv->bo_list_lock);
        if (r < 0) {
-               kfree(list);
+               amdgpu_bo_list_free(list);
                return r;
        }
        *id = r;
@@ -198,12 +198,16 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id)
        result = idr_find(&fpriv->bo_list_handles, id);
 
        if (result) {
-               if (kref_get_unless_zero(&result->refcount))
+               if (kref_get_unless_zero(&result->refcount)) {
+                       rcu_read_unlock();
                        mutex_lock(&result->lock);
-               else
+               } else {
+                       rcu_read_unlock();
                        result = NULL;
+               }
+       } else {
+               rcu_read_unlock();
        }
-       rcu_read_unlock();
 
        return result;
 }
index c0a806280257c80954930c3744d7cd65e6b6de5f..a99e0bca6812afee906a36797560b09a359ae1f5 100644 (file)
@@ -240,6 +240,8 @@ static uint32_t amdgpu_cgs_read_ind_register(struct cgs_device *cgs_device,
                return RREG32_DIDT(index);
        case CGS_IND_REG_GC_CAC:
                return RREG32_GC_CAC(index);
+       case CGS_IND_REG_SE_CAC:
+               return RREG32_SE_CAC(index);
        case CGS_IND_REG__AUDIO_ENDPT:
                DRM_ERROR("audio endpt register access not implemented.\n");
                return 0;
@@ -266,6 +268,8 @@ static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device,
                return WREG32_DIDT(index, value);
        case CGS_IND_REG_GC_CAC:
                return WREG32_GC_CAC(index, value);
+       case CGS_IND_REG_SE_CAC:
+               return WREG32_SE_CAC(index, value);
        case CGS_IND_REG__AUDIO_ENDPT:
                DRM_ERROR("audio endpt register access not implemented.\n");
                return;
@@ -610,6 +614,17 @@ static int amdgpu_cgs_enter_safe_mode(struct cgs_device *cgs_device,
        return 0;
 }
 
+static void amdgpu_cgs_lock_grbm_idx(struct cgs_device *cgs_device,
+                                       bool lock)
+{
+       CGS_FUNC_ADEV;
+
+       if (lock)
+               mutex_lock(&adev->grbm_idx_mutex);
+       else
+               mutex_unlock(&adev->grbm_idx_mutex);
+}
+
 static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
                                        enum cgs_ucode_id type,
                                        struct cgs_firmware_info *info)
@@ -719,7 +734,13 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
                                strcpy(fw_name, "amdgpu/polaris12_smc.bin");
                                break;
                        case CHIP_VEGA10:
-                               strcpy(fw_name, "amdgpu/vega10_smc.bin");
+                               if ((adev->pdev->device == 0x687f) &&
+                                       ((adev->pdev->revision == 0xc0) ||
+                                       (adev->pdev->revision == 0xc1) ||
+                                       (adev->pdev->revision == 0xc3)))
+                                       strcpy(fw_name, "amdgpu/vega10_acg_smc.bin");
+                               else
+                                       strcpy(fw_name, "amdgpu/vega10_smc.bin");
                                break;
                        default:
                                DRM_ERROR("SMC firmware not supported\n");
@@ -1117,6 +1138,7 @@ static const struct cgs_ops amdgpu_cgs_ops = {
        .query_system_info = amdgpu_cgs_query_system_info,
        .is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled,
        .enter_safe_mode = amdgpu_cgs_enter_safe_mode,
+       .lock_grbm_idx = amdgpu_cgs_lock_grbm_idx,
 };
 
 static const struct cgs_os_ops amdgpu_cgs_os_ops = {
index 5599c01b265d40c105b59b5ca0f3f14c2efdf40e..33789510e663f96594468e0eab258ff62d524f0b 100644 (file)
@@ -223,10 +223,11 @@ static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
  * ticks. The accumulated microseconds (us) are converted to bytes and
  * returned.
  */
-static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
+static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
+                                             u64 *max_bytes,
+                                             u64 *max_vis_bytes)
 {
        s64 time_us, increment_us;
-       u64 max_bytes;
        u64 free_vram, total_vram, used_vram;
 
        /* Allow a maximum of 200 accumulated ms. This is basically per-IB
@@ -238,8 +239,11 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
         */
        const s64 us_upper_bound = 200000;
 
-       if (!adev->mm_stats.log2_max_MBps)
-               return 0;
+       if (!adev->mm_stats.log2_max_MBps) {
+               *max_bytes = 0;
+               *max_vis_bytes = 0;
+               return;
+       }
 
        total_vram = adev->mc.real_vram_size - adev->vram_pin_size;
        used_vram = atomic64_read(&adev->vram_usage);
@@ -280,23 +284,45 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
                adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
        }
 
-       /* This returns 0 if the driver is in debt to disallow (optional)
+       /* This is set to 0 if the driver is in debt to disallow (optional)
         * buffer moves.
         */
-       max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
+       *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
+
+       /* Do the same for visible VRAM if half of it is free */
+       if (adev->mc.visible_vram_size < adev->mc.real_vram_size) {
+               u64 total_vis_vram = adev->mc.visible_vram_size;
+               u64 used_vis_vram = atomic64_read(&adev->vram_vis_usage);
+
+               if (used_vis_vram < total_vis_vram) {
+                       u64 free_vis_vram = total_vis_vram - used_vis_vram;
+                       adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
+                                                         increment_us, us_upper_bound);
+
+                       if (free_vis_vram >= total_vis_vram / 2)
+                               adev->mm_stats.accum_us_vis =
+                                       max(bytes_to_us(adev, free_vis_vram / 2),
+                                           adev->mm_stats.accum_us_vis);
+               }
+
+               *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
+       } else {
+               *max_vis_bytes = 0;
+       }
 
        spin_unlock(&adev->mm_stats.lock);
-       return max_bytes;
 }
 
 /* Report how many bytes have really been moved for the last command
  * submission. This can result in a debt that can stop buffer migrations
  * temporarily.
  */
-void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes)
+void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
+                                 u64 num_vis_bytes)
 {
        spin_lock(&adev->mm_stats.lock);
        adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
+       adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
        spin_unlock(&adev->mm_stats.lock);
 }
 
@@ -304,7 +330,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
                                 struct amdgpu_bo *bo)
 {
        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-       u64 initial_bytes_moved;
+       u64 initial_bytes_moved, bytes_moved;
        uint32_t domain;
        int r;
 
@@ -314,17 +340,35 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
        /* Don't move this buffer if we have depleted our allowance
         * to move it. Don't move anything if the threshold is zero.
         */
-       if (p->bytes_moved < p->bytes_moved_threshold)
-               domain = bo->prefered_domains;
-       else
+       if (p->bytes_moved < p->bytes_moved_threshold) {
+               if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+                   (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
+                       /* And don't move a CPU_ACCESS_REQUIRED BO to limited
+                        * visible VRAM if we've depleted our allowance to do
+                        * that.
+                        */
+                       if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
+                               domain = bo->prefered_domains;
+                       else
+                               domain = bo->allowed_domains;
+               } else {
+                       domain = bo->prefered_domains;
+               }
+       } else {
                domain = bo->allowed_domains;
+       }
 
 retry:
        amdgpu_ttm_placement_from_domain(bo, domain);
        initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
        r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
-       p->bytes_moved += atomic64_read(&adev->num_bytes_moved) -
-               initial_bytes_moved;
+       bytes_moved = atomic64_read(&adev->num_bytes_moved) -
+                     initial_bytes_moved;
+       p->bytes_moved += bytes_moved;
+       if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+           bo->tbo.mem.mem_type == TTM_PL_VRAM &&
+           bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
+               p->bytes_moved_vis += bytes_moved;
 
        if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
                domain = bo->allowed_domains;
@@ -350,7 +394,8 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
                struct amdgpu_bo_list_entry *candidate = p->evictable;
                struct amdgpu_bo *bo = candidate->robj;
                struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-               u64 initial_bytes_moved;
+               u64 initial_bytes_moved, bytes_moved;
+               bool update_bytes_moved_vis;
                uint32_t other;
 
                /* If we reached our current BO we can forget it */
@@ -370,10 +415,17 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
 
                /* Good we can try to move this BO somewhere else */
                amdgpu_ttm_placement_from_domain(bo, other);
+               update_bytes_moved_vis =
+                       adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+                       bo->tbo.mem.mem_type == TTM_PL_VRAM &&
+                       bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT;
                initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
                r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
-               p->bytes_moved += atomic64_read(&adev->num_bytes_moved) -
+               bytes_moved = atomic64_read(&adev->num_bytes_moved) -
                        initial_bytes_moved;
+               p->bytes_moved += bytes_moved;
+               if (update_bytes_moved_vis)
+                       p->bytes_moved_vis += bytes_moved;
 
                if (unlikely(r))
                        break;
@@ -554,8 +606,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
                list_splice(&need_pages, &p->validated);
        }
 
-       p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev);
+       amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
+                                         &p->bytes_moved_vis_threshold);
        p->bytes_moved = 0;
+       p->bytes_moved_vis = 0;
        p->evictable = list_last_entry(&p->validated,
                                       struct amdgpu_bo_list_entry,
                                       tv.head);
@@ -579,8 +633,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
                goto error_validate;
        }
 
-       amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved);
-
+       amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
+                                    p->bytes_moved_vis);
        fpriv->vm.last_eviction_counter =
                atomic64_read(&p->adev->num_evictions);
 
index 4a8fc15467cf8c40ed268f190a01acd4f506fed0..6279956e92a4175759280a76741de3656ef127f7 100644 (file)
@@ -53,6 +53,9 @@
 #include "bif/bif_4_1_d.h"
 #include <linux/pci.h>
 #include <linux/firmware.h>
+#include "amdgpu_vf_error.h"
+
+#include "amdgpu_amdkfd.h"
 
 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
@@ -128,6 +131,10 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
 {
        trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
 
+       if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
+               adev->last_mm_index = v;
+       }
+
        if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) {
                BUG_ON(in_interrupt());
                return amdgpu_virt_kiq_wreg(adev, reg, v);
@@ -143,6 +150,10 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
                writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
                spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
        }
+
+       if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
+               udelay(500);
+       }
 }
 
 u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
@@ -157,6 +168,9 @@ u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
 
 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
 {
+       if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
+               adev->last_mm_index = v;
+       }
 
        if ((reg * 4) < adev->rio_mem_size)
                iowrite32(v, adev->rio_mem + (reg * 4));
@@ -164,6 +178,10 @@ void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
                iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
                iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
        }
+
+       if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
+               udelay(500);
+       }
 }
 
 /**
@@ -584,6 +602,21 @@ int amdgpu_wb_get_64bit(struct amdgpu_device *adev, u32 *wb)
        }
 }
 
+int amdgpu_wb_get_256Bit(struct amdgpu_device *adev, u32 *wb)
+{
+       int i = 0;
+       unsigned long offset = bitmap_find_next_zero_area_off(adev->wb.used,
+                               adev->wb.num_wb, 0, 8, 63, 0);
+       if ((offset + 7) < adev->wb.num_wb) {
+               for (i = 0; i < 8; i++)
+                       __set_bit(offset + i, adev->wb.used);
+               *wb = offset;
+               return 0;
+       } else {
+               return -EINVAL;
+       }
+}
+
 /**
  * amdgpu_wb_free - Free a wb entry
  *
@@ -614,6 +647,23 @@ void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb)
        }
 }
 
+/**
+ * amdgpu_wb_free_256bit - Free a wb entry
+ *
+ * @adev: amdgpu_device pointer
+ * @wb: wb index
+ *
+ * Free a wb slot allocated for use by the driver (all asics)
+ */
+void amdgpu_wb_free_256bit(struct amdgpu_device *adev, u32 wb)
+{
+       int i = 0;
+
+       if ((wb + 7) < adev->wb.num_wb)
+               for (i = 0; i < 8; i++)
+                       __clear_bit(wb + i, adev->wb.used);
+}
+
 /**
  * amdgpu_vram_location - try to find VRAM location
  * @adev: amdgpu device structure holding all necessary informations
@@ -665,7 +715,7 @@ void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64
 }
 
 /**
- * amdgpu_gtt_location - try to find GTT location
+ * amdgpu_gart_location - try to find GTT location
  * @adev: amdgpu device structure holding all necessary informations
  * @mc: memory controller structure holding memory informations
  *
@@ -676,28 +726,28 @@ void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64
  *
  * FIXME: when reducing GTT size align new size on power of 2.
  */
-void amdgpu_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc)
+void amdgpu_gart_location(struct amdgpu_device *adev, struct amdgpu_mc *mc)
 {
        u64 size_af, size_bf;
 
-       size_af = ((adev->mc.mc_mask - mc->vram_end) + mc->gtt_base_align) & ~mc->gtt_base_align;
-       size_bf = mc->vram_start & ~mc->gtt_base_align;
+       size_af = adev->mc.mc_mask - mc->vram_end;
+       size_bf = mc->vram_start;
        if (size_bf > size_af) {
-               if (mc->gtt_size > size_bf) {
+               if (mc->gart_size > size_bf) {
                        dev_warn(adev->dev, "limiting GTT\n");
-                       mc->gtt_size = size_bf;
+                       mc->gart_size = size_bf;
                }
-               mc->gtt_start = 0;
+               mc->gart_start = 0;
        } else {
-               if (mc->gtt_size > size_af) {
+               if (mc->gart_size > size_af) {
                        dev_warn(adev->dev, "limiting GTT\n");
-                       mc->gtt_size = size_af;
+                       mc->gart_size = size_af;
                }
-               mc->gtt_start = (mc->vram_end + 1 + mc->gtt_base_align) & ~mc->gtt_base_align;
+               mc->gart_start = mc->vram_end + 1;
        }
-       mc->gtt_end = mc->gtt_start + mc->gtt_size - 1;
+       mc->gart_end = mc->gart_start + mc->gart_size - 1;
        dev_info(adev->dev, "GTT: %lluM 0x%016llX - 0x%016llX\n",
-                       mc->gtt_size >> 20, mc->gtt_start, mc->gtt_end);
+                       mc->gart_size >> 20, mc->gart_start, mc->gart_end);
 }
 
 /*
@@ -720,7 +770,12 @@ bool amdgpu_need_post(struct amdgpu_device *adev)
                adev->has_hw_reset = false;
                return true;
        }
-       /* then check MEM_SIZE, in case the crtcs are off */
+
+       /* bios scratch used on CIK+ */
+       if (adev->asic_type >= CHIP_BONAIRE)
+               return amdgpu_atombios_scratch_need_asic_init(adev);
+
+       /* check MEM_SIZE for older asics */
        reg = amdgpu_asic_get_config_memsize(adev);
 
        if ((reg != 0) && (reg != 0xffffffff))
@@ -1031,19 +1086,6 @@ static unsigned int amdgpu_vga_set_decode(void *cookie, bool state)
                return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
 }
 
-/**
- * amdgpu_check_pot_argument - check that argument is a power of two
- *
- * @arg: value to check
- *
- * Validates that a certain argument is a power of two (all asics).
- * Returns true if argument is valid.
- */
-static bool amdgpu_check_pot_argument(int arg)
-{
-       return (arg & (arg - 1)) == 0;
-}
-
 static void amdgpu_check_block_size(struct amdgpu_device *adev)
 {
        /* defines number of bits in page table versus page directory,
@@ -1077,7 +1119,7 @@ static void amdgpu_check_vm_size(struct amdgpu_device *adev)
        if (amdgpu_vm_size == -1)
                return;
 
-       if (!amdgpu_check_pot_argument(amdgpu_vm_size)) {
+       if (!is_power_of_2(amdgpu_vm_size)) {
                dev_warn(adev->dev, "VM size (%d) must be a power of 2\n",
                         amdgpu_vm_size);
                goto def_value;
@@ -1118,19 +1160,24 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev)
                dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
                         amdgpu_sched_jobs);
                amdgpu_sched_jobs = 4;
-       } else if (!amdgpu_check_pot_argument(amdgpu_sched_jobs)){
+       } else if (!is_power_of_2(amdgpu_sched_jobs)){
                dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
                         amdgpu_sched_jobs);
                amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
        }
 
-       if (amdgpu_gart_size != -1) {
+       if (amdgpu_gart_size < 32) {
+               /* gart size must be greater or equal to 32M */
+               dev_warn(adev->dev, "gart size (%d) too small\n",
+                        amdgpu_gart_size);
+               amdgpu_gart_size = 32;
+       }
+
+       if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
                /* gtt size must be greater or equal to 32M */
-               if (amdgpu_gart_size < 32) {
-                       dev_warn(adev->dev, "gart size (%d) too small\n",
-                                amdgpu_gart_size);
-                       amdgpu_gart_size = -1;
-               }
+               dev_warn(adev->dev, "gtt size (%d) too small\n",
+                                amdgpu_gtt_size);
+               amdgpu_gtt_size = -1;
        }
 
        amdgpu_check_vm_size(adev);
@@ -1138,7 +1185,7 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev)
        amdgpu_check_block_size(adev);
 
        if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 ||
-           !amdgpu_check_pot_argument(amdgpu_vram_page_split))) {
+           !is_power_of_2(amdgpu_vram_page_split))) {
                dev_warn(adev->dev, "invalid VRAM page split (%d)\n",
                         amdgpu_vram_page_split);
                amdgpu_vram_page_split = 1024;
@@ -2019,7 +2066,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        adev->flags = flags;
        adev->asic_type = flags & AMD_ASIC_MASK;
        adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
-       adev->mc.gtt_size = 512 * 1024 * 1024;
+       adev->mc.gart_size = 512 * 1024 * 1024;
        adev->accel_working = false;
        adev->num_rings = 0;
        adev->mman.buffer_funcs = NULL;
@@ -2068,6 +2115,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        spin_lock_init(&adev->uvd_ctx_idx_lock);
        spin_lock_init(&adev->didt_idx_lock);
        spin_lock_init(&adev->gc_cac_idx_lock);
+       spin_lock_init(&adev->se_cac_idx_lock);
        spin_lock_init(&adev->audio_endpt_idx_lock);
        spin_lock_init(&adev->mm_stats.lock);
 
@@ -2143,6 +2191,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        r = amdgpu_atombios_init(adev);
        if (r) {
                dev_err(adev->dev, "amdgpu_atombios_init failed\n");
+               amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
                goto failed;
        }
 
@@ -2153,6 +2202,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        if (amdgpu_vpost_needed(adev)) {
                if (!adev->bios) {
                        dev_err(adev->dev, "no vBIOS found\n");
+                       amdgpu_vf_error_put(AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
                        r = -EINVAL;
                        goto failed;
                }
@@ -2160,18 +2210,28 @@ int amdgpu_device_init(struct amdgpu_device *adev,
                r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
                if (r) {
                        dev_err(adev->dev, "gpu post error!\n");
+                       amdgpu_vf_error_put(AMDGIM_ERROR_VF_GPU_POST_ERROR, 0, 0);
                        goto failed;
                }
        } else {
                DRM_INFO("GPU post is not needed\n");
        }
 
-       if (!adev->is_atom_fw) {
+       if (adev->is_atom_fw) {
+               /* Initialize clocks */
+               r = amdgpu_atomfirmware_get_clock_info(adev);
+               if (r) {
+                       dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
+                       amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
+                       goto failed;
+               }
+       } else {
                /* Initialize clocks */
                r = amdgpu_atombios_get_clock_info(adev);
                if (r) {
                        dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
-                       return r;
+                       amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
+                       goto failed;
                }
                /* init i2c buses */
                amdgpu_atombios_i2c_init(adev);
@@ -2181,6 +2241,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        r = amdgpu_fence_driver_init(adev);
        if (r) {
                dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
+               amdgpu_vf_error_put(AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
                goto failed;
        }
 
@@ -2190,6 +2251,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        r = amdgpu_init(adev);
        if (r) {
                dev_err(adev->dev, "amdgpu_init failed\n");
+               amdgpu_vf_error_put(AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
                amdgpu_fini(adev);
                goto failed;
        }
@@ -2209,6 +2271,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        r = amdgpu_ib_pool_init(adev);
        if (r) {
                dev_err(adev->dev, "IB initialization failed (%d).\n", r);
+               amdgpu_vf_error_put(AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
                goto failed;
        }
 
@@ -2253,12 +2316,14 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        r = amdgpu_late_init(adev);
        if (r) {
                dev_err(adev->dev, "amdgpu_late_init failed\n");
+               amdgpu_vf_error_put(AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
                goto failed;
        }
 
        return 0;
 
 failed:
+       amdgpu_vf_error_trans_all(adev);
        if (runtime)
                vga_switcheroo_fini_domain_pm_ops(adev->dev);
        return r;
@@ -2351,6 +2416,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
        }
        drm_modeset_unlock_all(dev);
 
+       amdgpu_amdkfd_suspend(adev);
+
        /* unpin the front buffers and cursors */
        list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
                struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
@@ -2392,10 +2459,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
         */
        amdgpu_bo_evict_vram(adev);
 
-       if (adev->is_atom_fw)
-               amdgpu_atomfirmware_scratch_regs_save(adev);
-       else
-               amdgpu_atombios_scratch_regs_save(adev);
+       amdgpu_atombios_scratch_regs_save(adev);
        pci_save_state(dev->pdev);
        if (suspend) {
                /* Shut down the device */
@@ -2444,10 +2508,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
                if (r)
                        goto unlock;
        }
-       if (adev->is_atom_fw)
-               amdgpu_atomfirmware_scratch_regs_restore(adev);
-       else
-               amdgpu_atombios_scratch_regs_restore(adev);
+       amdgpu_atombios_scratch_regs_restore(adev);
 
        /* post card */
        if (amdgpu_need_post(adev)) {
@@ -2490,6 +2551,9 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
                        }
                }
        }
+       r = amdgpu_amdkfd_resume(adev);
+       if (r)
+               return r;
 
        /* blat the mode back in */
        if (fbcon) {
@@ -2860,21 +2924,9 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
                r = amdgpu_suspend(adev);
 
 retry:
-               /* Disable fb access */
-               if (adev->mode_info.num_crtc) {
-                       struct amdgpu_mode_mc_save save;
-                       amdgpu_display_stop_mc_access(adev, &save);
-                       amdgpu_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GMC);
-               }
-               if (adev->is_atom_fw)
-                       amdgpu_atomfirmware_scratch_regs_save(adev);
-               else
-                       amdgpu_atombios_scratch_regs_save(adev);
+               amdgpu_atombios_scratch_regs_save(adev);
                r = amdgpu_asic_reset(adev);
-               if (adev->is_atom_fw)
-                       amdgpu_atomfirmware_scratch_regs_restore(adev);
-               else
-                       amdgpu_atombios_scratch_regs_restore(adev);
+               amdgpu_atombios_scratch_regs_restore(adev);
                /* post card */
                amdgpu_atom_asic_init(adev->mode_info.atom_context);
 
@@ -2952,6 +3004,7 @@ out:
                }
        } else {
                dev_err(adev->dev, "asic resume failed (%d).\n", r);
+               amdgpu_vf_error_put(AMDGIM_ERROR_VF_ASIC_RESUME_FAIL, 0, r);
                for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
                        if (adev->rings[i] && adev->rings[i]->sched.thread) {
                                kthread_unpark(adev->rings[i]->sched.thread);
@@ -2962,12 +3015,16 @@ out:
        drm_helper_resume_force_mode(adev->ddev);
 
        ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
-       if (r)
+       if (r) {
                /* bad news, how to tell it to userspace ? */
                dev_info(adev->dev, "GPU reset failed\n");
-       else
+               amdgpu_vf_error_put(AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
+       }
+       else {
                dev_info(adev->dev, "GPU reset successed!\n");
+       }
 
+       amdgpu_vf_error_trans_all(adev);
        return r;
 }
 
index 46999247095390e382bbc23055b2ae618598a8fb..aa53a860c9049f4b83beaf554125aec89f5ff97a 100644 (file)
@@ -74,7 +74,9 @@
 #define KMS_DRIVER_PATCHLEVEL  0
 
 int amdgpu_vram_limit = 0;
-int amdgpu_gart_size = -1; /* auto */
+int amdgpu_vis_vram_limit = 0;
+unsigned amdgpu_gart_size = 256;
+int amdgpu_gtt_size = -1; /* auto */
 int amdgpu_moverate = -1; /* auto */
 int amdgpu_benchmarking = 0;
 int amdgpu_testing = 0;
@@ -106,6 +108,7 @@ unsigned amdgpu_pcie_gen_cap = 0;
 unsigned amdgpu_pcie_lane_cap = 0;
 unsigned amdgpu_cg_mask = 0xffffffff;
 unsigned amdgpu_pg_mask = 0xffffffff;
+unsigned amdgpu_sdma_phase_quantum = 32;
 char *amdgpu_disable_cu = NULL;
 char *amdgpu_virtual_display = NULL;
 unsigned amdgpu_pp_feature_mask = 0xffffffff;
@@ -120,8 +123,14 @@ int amdgpu_lbpw = -1;
 MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
 module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
 
-MODULE_PARM_DESC(gartsize, "Size of PCIE/IGP gart to setup in megabytes (32, 64, etc., -1 = auto)");
-module_param_named(gartsize, amdgpu_gart_size, int, 0600);
+MODULE_PARM_DESC(vis_vramlimit, "Restrict visible VRAM for testing, in megabytes");
+module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444);
+
+MODULE_PARM_DESC(gartsize, "Size of PCIE/IGP gart to setup in megabytes (32, 64, etc.)");
+module_param_named(gartsize, amdgpu_gart_size, uint, 0600);
+
+MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)");
+module_param_named(gttsize, amdgpu_gtt_size, int, 0600);
 
 MODULE_PARM_DESC(moverate, "Maximum buffer migration rate in MB/s. (32, 64, etc., -1=auto, 0=1=disabled)");
 module_param_named(moverate, amdgpu_moverate, int, 0600);
@@ -186,7 +195,7 @@ module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);
 MODULE_PARM_DESC(vm_update_mode, "VM update using CPU (0 = never (default except for large BAR(LB)), 1 = Graphics only, 2 = Compute only (default for LB), 3 = Both");
 module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444);
 
-MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 1024, -1 = disable)");
+MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 512, -1 = disable)");
 module_param_named(vram_page_split, amdgpu_vram_page_split, int, 0444);
 
 MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))");
@@ -199,7 +208,7 @@ MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default
 module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);
 
 MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))");
-module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, int, 0444);
+module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, uint, 0444);
 
 MODULE_PARM_DESC(no_evict, "Support pinning request from user space (1 = enable, 0 = disable (default))");
 module_param_named(no_evict, amdgpu_no_evict, int, 0444);
@@ -219,6 +228,9 @@ module_param_named(cg_mask, amdgpu_cg_mask, uint, 0444);
 MODULE_PARM_DESC(pg_mask, "Powergating flags mask (0 = disable power gating)");
 module_param_named(pg_mask, amdgpu_pg_mask, uint, 0444);
 
+MODULE_PARM_DESC(sdma_phase_quantum, "SDMA context switch phase quantum (x 1K GPU clock cycles, 0 = no change (default 32))");
+module_param_named(sdma_phase_quantum, amdgpu_sdma_phase_quantum, uint, 0444);
+
 MODULE_PARM_DESC(disable_cu, "Disable CUs (se.sh.cu,...)");
 module_param_named(disable_cu, amdgpu_disable_cu, charp, 0444);
 
index a57abc1a25fb5fbe6da96410316cc85342363b44..5cc4987cd8873a0a531c3822a27b808aeac8b926 100644 (file)
 /*
  * Common GART table functions.
  */
+
+/**
+ * amdgpu_gart_set_defaults - set the default gart_size
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set the default gart_size based on parameters and available VRAM.
+ */
+void amdgpu_gart_set_defaults(struct amdgpu_device *adev)
+{
+       adev->mc.gart_size = (uint64_t)amdgpu_gart_size << 20;
+}
+
 /**
  * amdgpu_gart_table_ram_alloc - allocate system ram for gart page table
  *
@@ -262,6 +275,41 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
        return 0;
 }
 
+/**
+ * amdgpu_gart_map - map dma_addresses into GART entries
+ *
+ * @adev: amdgpu_device pointer
+ * @offset: offset into the GPU's gart aperture
+ * @pages: number of pages to bind
+ * @dma_addr: DMA addresses of pages
+ *
+ * Map the dma_addresses into GART entries (all asics).
+ * Returns 0 for success, -EINVAL for failure.
+ */
+int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
+                   int pages, dma_addr_t *dma_addr, uint64_t flags,
+                   void *dst)
+{
+       uint64_t page_base;
+       unsigned i, j, t;
+
+       if (!adev->gart.ready) {
+               WARN(1, "trying to bind memory to uninitialized GART !\n");
+               return -EINVAL;
+       }
+
+       t = offset / AMDGPU_GPU_PAGE_SIZE;
+
+       for (i = 0; i < pages; i++) {
+               page_base = dma_addr[i];
+               for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) {
+                       amdgpu_gart_set_pte_pde(adev, dst, t, page_base, flags);
+                       page_base += AMDGPU_GPU_PAGE_SIZE;
+               }
+       }
+       return 0;
+}
+
 /**
  * amdgpu_gart_bind - bind pages into the gart page table
  *
@@ -279,31 +327,30 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
                     int pages, struct page **pagelist, dma_addr_t *dma_addr,
                     uint64_t flags)
 {
-       unsigned t;
-       unsigned p;
-       uint64_t page_base;
-       int i, j;
+#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
+       unsigned i,t,p;
+#endif
+       int r;
 
        if (!adev->gart.ready) {
                WARN(1, "trying to bind memory to uninitialized GART !\n");
                return -EINVAL;
        }
 
+#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
        t = offset / AMDGPU_GPU_PAGE_SIZE;
        p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
-
-       for (i = 0; i < pages; i++, p++) {
-#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
+       for (i = 0; i < pages; i++, p++)
                adev->gart.pages[p] = pagelist[i];
 #endif
-               if (adev->gart.ptr) {
-                       page_base = dma_addr[i];
-                       for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) {
-                               amdgpu_gart_set_pte_pde(adev, adev->gart.ptr, t, page_base, flags);
-                               page_base += AMDGPU_GPU_PAGE_SIZE;
-                       }
-               }
+
+       if (adev->gart.ptr) {
+               r = amdgpu_gart_map(adev, offset, pages, dma_addr, flags,
+                           adev->gart.ptr);
+               if (r)
+                       return r;
        }
+
        mb();
        amdgpu_gart_flush_gpu_tlb(adev, 0);
        return 0;
@@ -333,8 +380,8 @@ int amdgpu_gart_init(struct amdgpu_device *adev)
        if (r)
                return r;
        /* Compute table size */
-       adev->gart.num_cpu_pages = adev->mc.gtt_size / PAGE_SIZE;
-       adev->gart.num_gpu_pages = adev->mc.gtt_size / AMDGPU_GPU_PAGE_SIZE;
+       adev->gart.num_cpu_pages = adev->mc.gart_size / PAGE_SIZE;
+       adev->gart.num_gpu_pages = adev->mc.gart_size / AMDGPU_GPU_PAGE_SIZE;
        DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n",
                 adev->gart.num_cpu_pages, adev->gart.num_gpu_pages);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
new file mode 100644 (file)
index 0000000..d4cce69
--- /dev/null
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_GART_H__
+#define __AMDGPU_GART_H__
+
+#include <linux/types.h>
+
+/*
+ * GART structures, functions & helpers
+ */
+struct amdgpu_device;
+struct amdgpu_bo;
+struct amdgpu_gart_funcs;
+
+#define AMDGPU_GPU_PAGE_SIZE 4096
+#define AMDGPU_GPU_PAGE_MASK (AMDGPU_GPU_PAGE_SIZE - 1)
+#define AMDGPU_GPU_PAGE_SHIFT 12
+#define AMDGPU_GPU_PAGE_ALIGN(a) (((a) + AMDGPU_GPU_PAGE_MASK) & ~AMDGPU_GPU_PAGE_MASK)
+
+struct amdgpu_gart {
+       dma_addr_t                      table_addr;
+       struct amdgpu_bo                *robj;
+       void                            *ptr;
+       unsigned                        num_gpu_pages;
+       unsigned                        num_cpu_pages;
+       unsigned                        table_size;
+#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
+       struct page                     **pages;
+#endif
+       bool                            ready;
+
+       /* Asic default pte flags */
+       uint64_t                        gart_pte_flags;
+
+       const struct amdgpu_gart_funcs *gart_funcs;
+};
+
+void amdgpu_gart_set_defaults(struct amdgpu_device *adev);
+int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev);
+void amdgpu_gart_table_ram_free(struct amdgpu_device *adev);
+int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev);
+void amdgpu_gart_table_vram_free(struct amdgpu_device *adev);
+int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
+void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
+int amdgpu_gart_init(struct amdgpu_device *adev);
+void amdgpu_gart_fini(struct amdgpu_device *adev);
+int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
+                      int pages);
+int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
+                   int pages, dma_addr_t *dma_addr, uint64_t flags,
+                   void *dst);
+int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
+                    int pages, struct page **pagelist,
+                    dma_addr_t *dma_addr, uint64_t flags);
+
+#endif
index 621f739103a6a30c9cf38801343ff5397a928d63..917ac5e074a00e0ac4c5b408676298def1e4d06d 100644 (file)
@@ -49,7 +49,6 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
                                struct drm_gem_object **obj)
 {
        struct amdgpu_bo *robj;
-       unsigned long max_size;
        int r;
 
        *obj = NULL;
@@ -58,17 +57,6 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
                alignment = PAGE_SIZE;
        }
 
-       if (!(initial_domain & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA))) {
-               /* Maximum bo size is the unpinned gtt size since we use the gtt to
-                * handle vram to system pool migrations.
-                */
-               max_size = adev->mc.gtt_size - adev->gart_pin_size;
-               if (size > max_size) {
-                       DRM_DEBUG("Allocation size %ldMb bigger than %ldMb limit\n",
-                                 size >> 20, max_size >> 20);
-                       return -ENOMEM;
-               }
-       }
 retry:
        r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain,
                             flags, NULL, NULL, &robj);
@@ -784,6 +772,7 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data)
        unsigned domain;
        const char *placement;
        unsigned pin_count;
+       uint64_t offset;
 
        domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
        switch (domain) {
@@ -798,9 +787,12 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data)
                placement = " CPU";
                break;
        }
-       seq_printf(m, "\t0x%08x: %12ld byte %s @ 0x%010Lx",
-                  id, amdgpu_bo_size(bo), placement,
-                  amdgpu_bo_gpu_offset(bo));
+       seq_printf(m, "\t0x%08x: %12ld byte %s",
+                  id, amdgpu_bo_size(bo), placement);
+
+       offset = ACCESS_ONCE(bo->tbo.mem.start);
+       if (offset != AMDGPU_BO_INVALID_OFFSET)
+               seq_printf(m, " @ 0x%010Lx", offset);
 
        pin_count = ACCESS_ONCE(bo->pin_count);
        if (pin_count)
index e26108aad3fe246bfd12d9e1417c91a353cef5e3..4f6c68fc1dd91a43813a2782bbc9cf3dbbf43ded 100644 (file)
@@ -125,7 +125,8 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
                if (mec >= adev->gfx.mec.num_mec)
                        break;
 
-               if (adev->gfx.mec.num_mec > 1) {
+               /* FIXME: spreading the queues across pipes causes perf regressions */
+               if (0) {
                        /* policy: amdgpu owns the first two queues of the first MEC */
                        if (mec == 0 && queue < 2)
                                set_bit(i, adev->gfx.mec.queue_bitmap);
index f7d22c44034d43cce77ecd096c953e18aadc955d..5e6b90c6794f47317f25ab4e4f1142bacd58a06d 100644 (file)
@@ -42,13 +42,17 @@ struct amdgpu_gtt_mgr {
 static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man,
                               unsigned long p_size)
 {
+       struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev);
        struct amdgpu_gtt_mgr *mgr;
+       uint64_t start, size;
 
        mgr = kzalloc(sizeof(*mgr), GFP_KERNEL);
        if (!mgr)
                return -ENOMEM;
 
-       drm_mm_init(&mgr->mm, 0, p_size);
+       start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
+       size = (adev->mc.gart_size >> PAGE_SHIFT) - start;
+       drm_mm_init(&mgr->mm, start, size);
        spin_lock_init(&mgr->lock);
        mgr->available = p_size;
        man->priv = mgr;
@@ -80,6 +84,20 @@ static int amdgpu_gtt_mgr_fini(struct ttm_mem_type_manager *man)
        return 0;
 }
 
+/**
+ * amdgpu_gtt_mgr_is_allocated - Check if mem has address space
+ *
+ * @mem: the mem object to check
+ *
+ * Check if a mem object has already address space allocated.
+ */
+bool amdgpu_gtt_mgr_is_allocated(struct ttm_mem_reg *mem)
+{
+       struct drm_mm_node *node = mem->mm_node;
+
+       return (node->start != AMDGPU_BO_INVALID_OFFSET);
+}
+
 /**
  * amdgpu_gtt_mgr_alloc - allocate new ranges
  *
@@ -95,13 +113,14 @@ int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man,
                         const struct ttm_place *place,
                         struct ttm_mem_reg *mem)
 {
+       struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev);
        struct amdgpu_gtt_mgr *mgr = man->priv;
        struct drm_mm_node *node = mem->mm_node;
        enum drm_mm_insert_mode mode;
        unsigned long fpfn, lpfn;
        int r;
 
-       if (node->start != AMDGPU_BO_INVALID_OFFSET)
+       if (amdgpu_gtt_mgr_is_allocated(mem))
                return 0;
 
        if (place)
@@ -112,7 +131,7 @@ int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man,
        if (place && place->lpfn)
                lpfn = place->lpfn;
        else
-               lpfn = man->size;
+               lpfn = adev->gart.num_cpu_pages;
 
        mode = DRM_MM_INSERT_BEST;
        if (place && place->flags & TTM_PL_FLAG_TOPDOWN)
index f774b3f497d28735829d9b6407cd1832e9174945..659997bfff303b789f9f5fa6ae8ec17b0a02ae5c 100644 (file)
@@ -130,6 +130,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 
        unsigned i;
        int r = 0;
+       bool need_pipe_sync = false;
 
        if (num_ibs == 0)
                return -EINVAL;
@@ -165,15 +166,15 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
        if (ring->funcs->emit_pipeline_sync && job &&
            ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) ||
             amdgpu_vm_need_pipeline_sync(ring, job))) {
-               amdgpu_ring_emit_pipeline_sync(ring);
+               need_pipe_sync = true;
                dma_fence_put(tmp);
        }
 
        if (ring->funcs->insert_start)
                ring->funcs->insert_start(ring);
 
-       if (vm) {
-               r = amdgpu_vm_flush(ring, job);
+       if (job) {
+               r = amdgpu_vm_flush(ring, job, need_pipe_sync);
                if (r) {
                        amdgpu_ring_undo(ring);
                        return r;
index 2480273c1dcacc0e60ffab5ee42be3daac0cc409..4bdd851f56d081310614f27dce5093255ecf7dfd 100644 (file)
@@ -220,6 +220,10 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
        int r = 0;
 
        spin_lock_init(&adev->irq.lock);
+
+       /* Disable vblank irqs aggressively for power-saving */
+       adev->ddev->vblank_disable_immediate = true;
+
        r = drm_vblank_init(adev->ddev, adev->mode_info.num_crtc);
        if (r) {
                return r;
index 3d641e10e6b65c728ddda00be6027d7fe42cc461..4510627ae83e9b57e19dccfe19f260da00f918f2 100644 (file)
@@ -81,6 +81,8 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
        r = amdgpu_ib_get(adev, NULL, size, &(*job)->ibs[0]);
        if (r)
                kfree(*job);
+       else
+               (*job)->vm_pd_addr = adev->gart.table_addr;
 
        return r;
 }
index b0b23101d1c870ddeefc89b15818cd5b13ea13a7..09f833255ba1dcd4d9212d2966d903d9c381e5c9 100644 (file)
@@ -485,7 +485,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
                vram_gtt.vram_size -= adev->vram_pin_size;
                vram_gtt.vram_cpu_accessible_size = adev->mc.visible_vram_size;
                vram_gtt.vram_cpu_accessible_size -= (adev->vram_pin_size - adev->invisible_pin_size);
-               vram_gtt.gtt_size  = adev->mc.gtt_size;
+               vram_gtt.gtt_size = adev->mman.bdev.man[TTM_PL_TT].size;
+               vram_gtt.gtt_size *= PAGE_SIZE;
                vram_gtt.gtt_size -= adev->gart_pin_size;
                return copy_to_user(out, &vram_gtt,
                                    min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0;
@@ -510,9 +511,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
                mem.cpu_accessible_vram.max_allocation =
                        mem.cpu_accessible_vram.usable_heap_size * 3 / 4;
 
-               mem.gtt.total_heap_size = adev->mc.gtt_size;
-               mem.gtt.usable_heap_size =
-                       adev->mc.gtt_size - adev->gart_pin_size;
+               mem.gtt.total_heap_size = adev->mman.bdev.man[TTM_PL_TT].size;
+               mem.gtt.total_heap_size *= PAGE_SIZE;
+               mem.gtt.usable_heap_size = mem.gtt.total_heap_size
+                       - adev->gart_pin_size;
                mem.gtt.heap_usage = atomic64_read(&adev->gtt_usage);
                mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4;
 
@@ -571,8 +573,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
                        dev_info.max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 10;
                        dev_info.max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 10;
                } else {
-                       dev_info.max_engine_clock = adev->pm.default_sclk * 10;
-                       dev_info.max_memory_clock = adev->pm.default_mclk * 10;
+                       dev_info.max_engine_clock = adev->clock.default_sclk * 10;
+                       dev_info.max_memory_clock = adev->clock.default_mclk * 10;
                }
                dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask;
                dev_info.num_rb_pipes = adev->gfx.config.max_backends_per_se *
@@ -587,8 +589,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
                dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE;
                dev_info.virtual_address_max = (uint64_t)adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
                dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
-               dev_info.pte_fragment_size = (1 << AMDGPU_LOG2_PAGES_PER_FRAG) *
-                                            AMDGPU_GPU_PAGE_SIZE;
+               dev_info.pte_fragment_size =
+                       (1 << AMDGPU_LOG2_PAGES_PER_FRAG(adev)) *
+                       AMDGPU_GPU_PAGE_SIZE;
                dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE;
 
                dev_info.cu_active_number = adev->gfx.cu_info.number;
index 43a9d3aec6c426e20bbff9bbb30084fe3cc523b2..b8abd4e18d51697cfa36cafd499b42aabd9ed85a 100644 (file)
@@ -257,15 +257,7 @@ struct amdgpu_audio {
        int num_pins;
 };
 
-struct amdgpu_mode_mc_save {
-       u32 vga_render_control;
-       u32 vga_hdp_control;
-       bool crtc_enabled[AMDGPU_MAX_CRTCS];
-};
-
 struct amdgpu_display_funcs {
-       /* vga render */
-       void (*set_vga_render_state)(struct amdgpu_device *adev, bool render);
        /* display watermarks */
        void (*bandwidth_update)(struct amdgpu_device *adev);
        /* get frame count */
@@ -300,10 +292,6 @@ struct amdgpu_display_funcs {
                              uint16_t connector_object_id,
                              struct amdgpu_hpd *hpd,
                              struct amdgpu_router *router);
-       void (*stop_mc_access)(struct amdgpu_device *adev,
-                              struct amdgpu_mode_mc_save *save);
-       void (*resume_mc_access)(struct amdgpu_device *adev,
-                                struct amdgpu_mode_mc_save *save);
 };
 
 struct amdgpu_mode_info {
index 8ee69652be8ceea145129cefb46ab46920744a74..3ec43cf9ad78fc17420dcf8c4be86173e20d9ff3 100644 (file)
@@ -93,6 +93,7 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 
        bo = container_of(tbo, struct amdgpu_bo, tbo);
 
+       amdgpu_bo_kunmap(bo);
        amdgpu_update_memory_usage(adev, &bo->tbo.mem, NULL);
 
        drm_gem_object_release(&bo->gem_base);
@@ -322,7 +323,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
        struct amdgpu_bo *bo;
        enum ttm_bo_type type;
        unsigned long page_align;
-       u64 initial_bytes_moved;
+       u64 initial_bytes_moved, bytes_moved;
        size_t acc_size;
        int r;
 
@@ -398,8 +399,14 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
        r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type,
                                 &bo->placement, page_align, !kernel, NULL,
                                 acc_size, sg, resv, &amdgpu_ttm_bo_destroy);
-       amdgpu_cs_report_moved_bytes(adev,
-               atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved);
+       bytes_moved = atomic64_read(&adev->num_bytes_moved) -
+                     initial_bytes_moved;
+       if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+           bo->tbo.mem.mem_type == TTM_PL_VRAM &&
+           bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
+               amdgpu_cs_report_moved_bytes(adev, bytes_moved, bytes_moved);
+       else
+               amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0);
 
        if (unlikely(r != 0))
                return r;
@@ -426,6 +433,10 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
 
        trace_amdgpu_bo_create(bo);
 
+       /* Treat CPU_ACCESS_REQUIRED only as a hint if given by UMD */
+       if (type == ttm_bo_type_device)
+               bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+
        return 0;
 
 fail_unreserve:
@@ -535,7 +546,7 @@ int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
 
        r = amdgpu_copy_buffer(ring, bo_addr, shadow_addr,
                               amdgpu_bo_size(bo), resv, fence,
-                              direct);
+                              direct, false);
        if (!r)
                amdgpu_bo_fence(bo, *fence, true);
 
@@ -588,7 +599,7 @@ int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
 
        r = amdgpu_copy_buffer(ring, shadow_addr, bo_addr,
                               amdgpu_bo_size(bo), resv, fence,
-                              direct);
+                              direct, false);
        if (!r)
                amdgpu_bo_fence(bo, *fence, true);
 
@@ -724,15 +735,16 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
                dev_err(adev->dev, "%p pin failed\n", bo);
                goto error;
        }
-       r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem);
-       if (unlikely(r)) {
-               dev_err(adev->dev, "%p bind failed\n", bo);
-               goto error;
-       }
 
        bo->pin_count = 1;
-       if (gpu_addr != NULL)
+       if (gpu_addr != NULL) {
+               r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem);
+               if (unlikely(r)) {
+                       dev_err(adev->dev, "%p bind failed\n", bo);
+                       goto error;
+               }
                *gpu_addr = amdgpu_bo_gpu_offset(bo);
+       }
        if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
                adev->vram_pin_size += amdgpu_bo_size(bo);
                if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
@@ -921,6 +933,8 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
        abo = container_of(bo, struct amdgpu_bo, tbo);
        amdgpu_vm_bo_invalidate(adev, abo);
 
+       amdgpu_bo_kunmap(abo);
+
        /* remember the eviction */
        if (evict)
                atomic64_inc(&adev->num_evictions);
@@ -939,19 +953,22 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 {
        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
        struct amdgpu_bo *abo;
-       unsigned long offset, size, lpfn;
-       int i, r;
+       unsigned long offset, size;
+       int r;
 
        if (!amdgpu_ttm_bo_is_amdgpu_bo(bo))
                return 0;
 
        abo = container_of(bo, struct amdgpu_bo, tbo);
+
+       /* Remember that this BO was accessed by the CPU */
+       abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+
        if (bo->mem.mem_type != TTM_PL_VRAM)
                return 0;
 
        size = bo->mem.num_pages << PAGE_SHIFT;
        offset = bo->mem.start << PAGE_SHIFT;
-       /* TODO: figure out how to map scattered VRAM to the CPU */
        if ((offset + size) <= adev->mc.visible_vram_size)
                return 0;
 
@@ -961,26 +978,21 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 
        /* hurrah the memory is not visible ! */
        atomic64_inc(&adev->num_vram_cpu_page_faults);
-       amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM);
-       lpfn =  adev->mc.visible_vram_size >> PAGE_SHIFT;
-       for (i = 0; i < abo->placement.num_placement; i++) {
-               /* Force into visible VRAM */
-               if ((abo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
-                   (!abo->placements[i].lpfn ||
-                    abo->placements[i].lpfn > lpfn))
-                       abo->placements[i].lpfn = lpfn;
-       }
+       amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
+                                        AMDGPU_GEM_DOMAIN_GTT);
+
+       /* Avoid costly evictions; only set GTT as a busy placement */
+       abo->placement.num_busy_placement = 1;
+       abo->placement.busy_placement = &abo->placements[1];
+
        r = ttm_bo_validate(bo, &abo->placement, false, false);
-       if (unlikely(r == -ENOMEM)) {
-               amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
-               return ttm_bo_validate(bo, &abo->placement, false, false);
-       } else if (unlikely(r != 0)) {
+       if (unlikely(r != 0))
                return r;
-       }
 
        offset = bo->mem.start << PAGE_SHIFT;
        /* this should never happen */
-       if ((offset + size) > adev->mc.visible_vram_size)
+       if (bo->mem.mem_type == TTM_PL_VRAM &&
+           (offset + size) > adev->mc.visible_vram_size)
                return -EINVAL;
 
        return 0;
index 382485115b0641b059602d21fa92be33281ac8de..833b172a2c2a8b6cae6ca3b6dba3054ba35ba15b 100644 (file)
@@ -120,7 +120,11 @@ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo)
  */
 static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo)
 {
-       return bo->tbo.mem.mem_type != TTM_PL_SYSTEM;
+       switch (bo->tbo.mem.mem_type) {
+       case TTM_PL_TT: return amdgpu_ttm_is_bound(bo->tbo.ttm);
+       case TTM_PL_VRAM: return true;
+       default: return false;
+       }
 }
 
 int amdgpu_bo_create(struct amdgpu_device *adev,
index 4083be61b328fb56f1832fa95496074e8c84cef7..8c2204c7b3847c3ce18042b48d70516155b843e5 100644 (file)
@@ -63,8 +63,13 @@ static int psp_sw_init(void *handle)
                psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk;
                break;
        case CHIP_RAVEN:
+#if 0
+               psp->init_microcode = psp_v10_0_init_microcode;
+#endif
                psp->prep_cmd_buf = psp_v10_0_prep_cmd_buf;
                psp->ring_init = psp_v10_0_ring_init;
+               psp->ring_create = psp_v10_0_ring_create;
+               psp->ring_destroy = psp_v10_0_ring_destroy;
                psp->cmd_submit = psp_v10_0_cmd_submit;
                psp->compare_sram_data = psp_v10_0_compare_sram_data;
                break;
@@ -95,9 +100,8 @@ int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
        int i;
        struct amdgpu_device *adev = psp->adev;
 
-       val = RREG32(reg_index);
-
        for (i = 0; i < adev->usec_timeout; i++) {
+               val = RREG32(reg_index);
                if (check_changed) {
                        if (val != reg_val)
                                return 0;
@@ -118,33 +122,18 @@ psp_cmd_submit_buf(struct psp_context *psp,
                   int index)
 {
        int ret;
-       struct amdgpu_bo *cmd_buf_bo;
-       uint64_t cmd_buf_mc_addr;
-       struct psp_gfx_cmd_resp *cmd_buf_mem;
-       struct amdgpu_device *adev = psp->adev;
-
-       ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE,
-                                     AMDGPU_GEM_DOMAIN_VRAM,
-                                     &cmd_buf_bo, &cmd_buf_mc_addr,
-                                     (void **)&cmd_buf_mem);
-       if (ret)
-               return ret;
 
-       memset(cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE);
+       memset(psp->cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE);
 
-       memcpy(cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp));
+       memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp));
 
-       ret = psp_cmd_submit(psp, ucode, cmd_buf_mc_addr,
+       ret = psp_cmd_submit(psp, ucode, psp->cmd_buf_mc_addr,
                             fence_mc_addr, index);
 
        while (*((unsigned int *)psp->fence_buf) != index) {
                msleep(1);
        }
 
-       amdgpu_bo_free_kernel(&cmd_buf_bo,
-                             &cmd_buf_mc_addr,
-                             (void **)&cmd_buf_mem);
-
        return ret;
 }
 
@@ -351,6 +340,13 @@ static int psp_load_fw(struct amdgpu_device *adev)
                                      &psp->fence_buf_bo,
                                      &psp->fence_buf_mc_addr,
                                      &psp->fence_buf);
+       if (ret)
+               goto failed_mem2;
+
+       ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE,
+                                     AMDGPU_GEM_DOMAIN_VRAM,
+                                     &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
+                                     (void **)&psp->cmd_buf_mem);
        if (ret)
                goto failed_mem1;
 
@@ -358,7 +354,7 @@ static int psp_load_fw(struct amdgpu_device *adev)
 
        ret = psp_ring_init(psp, PSP_RING_TYPE__KM);
        if (ret)
-               goto failed_mem1;
+               goto failed_mem;
 
        ret = psp_tmr_init(psp);
        if (ret)
@@ -379,9 +375,13 @@ static int psp_load_fw(struct amdgpu_device *adev)
        return 0;
 
 failed_mem:
+       amdgpu_bo_free_kernel(&psp->cmd_buf_bo,
+                             &psp->cmd_buf_mc_addr,
+                             (void **)&psp->cmd_buf_mem);
+failed_mem1:
        amdgpu_bo_free_kernel(&psp->fence_buf_bo,
                              &psp->fence_buf_mc_addr, &psp->fence_buf);
-failed_mem1:
+failed_mem2:
        amdgpu_bo_free_kernel(&psp->fw_pri_bo,
                              &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
 failed:
@@ -435,16 +435,15 @@ static int psp_hw_fini(void *handle)
 
        psp_ring_destroy(psp, PSP_RING_TYPE__KM);
 
-       if (psp->tmr_buf)
-               amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
-
-       if (psp->fw_pri_buf)
-               amdgpu_bo_free_kernel(&psp->fw_pri_bo,
-                                     &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
-
-       if (psp->fence_buf_bo)
-               amdgpu_bo_free_kernel(&psp->fence_buf_bo,
-                                     &psp->fence_buf_mc_addr, &psp->fence_buf);
+       amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
+       amdgpu_bo_free_kernel(&psp->fw_pri_bo,
+                             &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
+       amdgpu_bo_free_kernel(&psp->fence_buf_bo,
+                             &psp->fence_buf_mc_addr, &psp->fence_buf);
+       amdgpu_bo_free_kernel(&psp->asd_shared_bo, &psp->asd_shared_mc_addr,
+                             &psp->asd_shared_buf);
+       amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
+                             (void **)&psp->cmd_buf_mem);
 
        kfree(psp->cmd);
        psp->cmd = NULL;
index 1a1c8b469f9385758c188c825597cf67b6121c2d..538fa9dbfb21200094df54f07ad598f77f323fc6 100644 (file)
@@ -108,6 +108,11 @@ struct psp_context
        struct amdgpu_bo                *fence_buf_bo;
        uint64_t                        fence_buf_mc_addr;
        void                            *fence_buf;
+
+       /* cmd buffer */
+       struct amdgpu_bo                *cmd_buf_bo;
+       uint64_t                        cmd_buf_mc_addr;
+       struct psp_gfx_cmd_resp         *cmd_buf_mem;
 };
 
 struct amdgpu_psp_funcs {
index 75165e07b1cd8807831d4dd602a9c1a2037d8ad6..15b7149d120440eade9c53caedc799b93a5c275f 100644 (file)
@@ -212,10 +212,19 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 
        }
 
-       r = amdgpu_wb_get(adev, &ring->fence_offs);
-       if (r) {
-               dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
-               return r;
+       if (amdgpu_sriov_vf(adev) && ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
+               r = amdgpu_wb_get_256Bit(adev, &ring->fence_offs);
+               if (r) {
+                       dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
+                       return r;
+               }
+
+       } else {
+               r = amdgpu_wb_get(adev, &ring->fence_offs);
+               if (r) {
+                       dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
+                       return r;
+               }
        }
 
        r = amdgpu_wb_get(adev, &ring->cond_exe_offs);
@@ -278,17 +287,18 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
        ring->ready = false;
 
        if (ring->funcs->support_64bit_ptrs) {
-               amdgpu_wb_free_64bit(ring->adev, ring->cond_exe_offs);
-               amdgpu_wb_free_64bit(ring->adev, ring->fence_offs);
                amdgpu_wb_free_64bit(ring->adev, ring->rptr_offs);
                amdgpu_wb_free_64bit(ring->adev, ring->wptr_offs);
        } else {
-               amdgpu_wb_free(ring->adev, ring->cond_exe_offs);
-               amdgpu_wb_free(ring->adev, ring->fence_offs);
                amdgpu_wb_free(ring->adev, ring->rptr_offs);
                amdgpu_wb_free(ring->adev, ring->wptr_offs);
        }
 
+       amdgpu_wb_free(ring->adev, ring->cond_exe_offs);
+       if (amdgpu_sriov_vf(ring->adev) && ring->funcs->type == AMDGPU_RING_TYPE_GFX)
+               amdgpu_wb_free_256bit(ring->adev, ring->fence_offs);
+       else
+               amdgpu_wb_free(ring->adev, ring->fence_offs);
 
        amdgpu_bo_free_kernel(&ring->ring_obj,
                              &ring->gpu_addr,
index bc8dec992f73d5da7a4d8742fd3d7bc110cd0312..322d25299a00cf364fba3b8ad4343ffa22b3efb5 100644 (file)
@@ -212,4 +212,44 @@ static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
 
 }
 
+static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
+{
+       if (ring->count_dw <= 0)
+               DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
+       ring->ring[ring->wptr++ & ring->buf_mask] = v;
+       ring->wptr &= ring->ptr_mask;
+       ring->count_dw--;
+}
+
+static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
+                                             void *src, int count_dw)
+{
+       unsigned occupied, chunk1, chunk2;
+       void *dst;
+
+       if (unlikely(ring->count_dw < count_dw))
+               DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
+
+       occupied = ring->wptr & ring->buf_mask;
+       dst = (void *)&ring->ring[occupied];
+       chunk1 = ring->buf_mask + 1 - occupied;
+       chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1;
+       chunk2 = count_dw - chunk1;
+       chunk1 <<= 2;
+       chunk2 <<= 2;
+
+       if (chunk1)
+               memcpy(dst, src, chunk1);
+
+       if (chunk2) {
+               src += chunk1;
+               dst = (void *)ring->ring;
+               memcpy(dst, src, chunk2);
+       }
+
+       ring->wptr += count_dw;
+       ring->wptr &= ring->ptr_mask;
+       ring->count_dw -= count_dw;
+}
+
 #endif
index 15510dadde018d87d455b371af32e8d349c3a6e1..3c4d7574d704ea5915d0195043715676a33a3a04 100644 (file)
@@ -33,7 +33,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
        struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
        struct amdgpu_bo *vram_obj = NULL;
        struct amdgpu_bo **gtt_obj = NULL;
-       uint64_t gtt_addr, vram_addr;
+       uint64_t gart_addr, vram_addr;
        unsigned n, size;
        int i, r;
 
@@ -42,7 +42,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
        /* Number of tests =
         * (Total GTT - IB pool - writeback page - ring buffers) / test size
         */
-       n = adev->mc.gtt_size - AMDGPU_IB_POOL_SIZE*64*1024;
+       n = adev->mc.gart_size - AMDGPU_IB_POOL_SIZE*64*1024;
        for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
                if (adev->rings[i])
                        n -= adev->rings[i]->ring_size;
@@ -76,7 +76,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
        }
        for (i = 0; i < n; i++) {
                void *gtt_map, *vram_map;
-               void **gtt_start, **gtt_end;
+               void **gart_start, **gart_end;
                void **vram_start, **vram_end;
                struct dma_fence *fence = NULL;
 
@@ -91,7 +91,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
                r = amdgpu_bo_reserve(gtt_obj[i], false);
                if (unlikely(r != 0))
                        goto out_lclean_unref;
-               r = amdgpu_bo_pin(gtt_obj[i], AMDGPU_GEM_DOMAIN_GTT, &gtt_addr);
+               r = amdgpu_bo_pin(gtt_obj[i], AMDGPU_GEM_DOMAIN_GTT, &gart_addr);
                if (r) {
                        DRM_ERROR("Failed to pin GTT object %d\n", i);
                        goto out_lclean_unres;
@@ -103,15 +103,15 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
                        goto out_lclean_unpin;
                }
 
-               for (gtt_start = gtt_map, gtt_end = gtt_map + size;
-                    gtt_start < gtt_end;
-                    gtt_start++)
-                       *gtt_start = gtt_start;
+               for (gart_start = gtt_map, gart_end = gtt_map + size;
+                    gart_start < gart_end;
+                    gart_start++)
+                       *gart_start = gart_start;
 
                amdgpu_bo_kunmap(gtt_obj[i]);
 
-               r = amdgpu_copy_buffer(ring, gtt_addr, vram_addr,
-                                      size, NULL, &fence, false);
+               r = amdgpu_copy_buffer(ring, gart_addr, vram_addr,
+                                      size, NULL, &fence, false, false);
 
                if (r) {
                        DRM_ERROR("Failed GTT->VRAM copy %d\n", i);
@@ -132,21 +132,21 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
                        goto out_lclean_unpin;
                }
 
-               for (gtt_start = gtt_map, gtt_end = gtt_map + size,
+               for (gart_start = gtt_map, gart_end = gtt_map + size,
                     vram_start = vram_map, vram_end = vram_map + size;
                     vram_start < vram_end;
-                    gtt_start++, vram_start++) {
-                       if (*vram_start != gtt_start) {
+                    gart_start++, vram_start++) {
+                       if (*vram_start != gart_start) {
                                DRM_ERROR("Incorrect GTT->VRAM copy %d: Got 0x%p, "
                                          "expected 0x%p (GTT/VRAM offset "
                                          "0x%16llx/0x%16llx)\n",
-                                         i, *vram_start, gtt_start,
+                                         i, *vram_start, gart_start,
                                          (unsigned long long)
-                                         (gtt_addr - adev->mc.gtt_start +
-                                          (void*)gtt_start - gtt_map),
+                                         (gart_addr - adev->mc.gart_start +
+                                          (void*)gart_start - gtt_map),
                                          (unsigned long long)
                                          (vram_addr - adev->mc.vram_start +
-                                          (void*)gtt_start - gtt_map));
+                                          (void*)gart_start - gtt_map));
                                amdgpu_bo_kunmap(vram_obj);
                                goto out_lclean_unpin;
                        }
@@ -155,8 +155,8 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 
                amdgpu_bo_kunmap(vram_obj);
 
-               r = amdgpu_copy_buffer(ring, vram_addr, gtt_addr,
-                                      size, NULL, &fence, false);
+               r = amdgpu_copy_buffer(ring, vram_addr, gart_addr,
+                                      size, NULL, &fence, false, false);
 
                if (r) {
                        DRM_ERROR("Failed VRAM->GTT copy %d\n", i);
@@ -177,20 +177,20 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
                        goto out_lclean_unpin;
                }
 
-               for (gtt_start = gtt_map, gtt_end = gtt_map + size,
+               for (gart_start = gtt_map, gart_end = gtt_map + size,
                     vram_start = vram_map, vram_end = vram_map + size;
-                    gtt_start < gtt_end;
-                    gtt_start++, vram_start++) {
-                       if (*gtt_start != vram_start) {
+                    gart_start < gart_end;
+                    gart_start++, vram_start++) {
+                       if (*gart_start != vram_start) {
                                DRM_ERROR("Incorrect VRAM->GTT copy %d: Got 0x%p, "
                                          "expected 0x%p (VRAM/GTT offset "
                                          "0x%16llx/0x%16llx)\n",
-                                         i, *gtt_start, vram_start,
+                                         i, *gart_start, vram_start,
                                          (unsigned long long)
                                          (vram_addr - adev->mc.vram_start +
                                           (void*)vram_start - vram_map),
                                          (unsigned long long)
-                                         (gtt_addr - adev->mc.gtt_start +
+                                         (gart_addr - adev->mc.gart_start +
                                           (void*)vram_start - vram_map));
                                amdgpu_bo_kunmap(gtt_obj[i]);
                                goto out_lclean_unpin;
@@ -200,7 +200,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
                amdgpu_bo_kunmap(gtt_obj[i]);
 
                DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 0x%llx\n",
-                        gtt_addr - adev->mc.gtt_start);
+                        gart_addr - adev->mc.gart_start);
                continue;
 
 out_lclean_unpin:
index 8601904e670ae7cf094a341c19abb76404fb3ec6..509f7a63d40ce86679ccf071eb6639798180727b 100644 (file)
@@ -224,7 +224,7 @@ TRACE_EVENT(amdgpu_vm_bo_map,
                             __field(long, start)
                             __field(long, last)
                             __field(u64, offset)
-                            __field(u32, flags)
+                            __field(u64, flags)
                             ),
 
            TP_fast_assign(
@@ -234,7 +234,7 @@ TRACE_EVENT(amdgpu_vm_bo_map,
                           __entry->offset = mapping->offset;
                           __entry->flags = mapping->flags;
                           ),
-           TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%08x",
+           TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%llx",
                      __entry->bo, __entry->start, __entry->last,
                      __entry->offset, __entry->flags)
 );
@@ -248,7 +248,7 @@ TRACE_EVENT(amdgpu_vm_bo_unmap,
                             __field(long, start)
                             __field(long, last)
                             __field(u64, offset)
-                            __field(u32, flags)
+                            __field(u64, flags)
                             ),
 
            TP_fast_assign(
@@ -258,7 +258,7 @@ TRACE_EVENT(amdgpu_vm_bo_unmap,
                           __entry->offset = mapping->offset;
                           __entry->flags = mapping->flags;
                           ),
-           TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%08x",
+           TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%llx",
                      __entry->bo, __entry->start, __entry->last,
                      __entry->offset, __entry->flags)
 );
@@ -269,7 +269,7 @@ DECLARE_EVENT_CLASS(amdgpu_vm_mapping,
            TP_STRUCT__entry(
                             __field(u64, soffset)
                             __field(u64, eoffset)
-                            __field(u32, flags)
+                            __field(u64, flags)
                             ),
 
            TP_fast_assign(
@@ -277,7 +277,7 @@ DECLARE_EVENT_CLASS(amdgpu_vm_mapping,
                           __entry->eoffset = mapping->last + 1;
                           __entry->flags = mapping->flags;
                           ),
-           TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x",
+           TP_printk("soffs=%010llx, eoffs=%010llx, flags=%llx",
                      __entry->soffset, __entry->eoffset, __entry->flags)
 );
 
@@ -293,14 +293,14 @@ DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_mapping,
 
 TRACE_EVENT(amdgpu_vm_set_ptes,
            TP_PROTO(uint64_t pe, uint64_t addr, unsigned count,
-                    uint32_t incr, uint32_t flags),
+                    uint32_t incr, uint64_t flags),
            TP_ARGS(pe, addr, count, incr, flags),
            TP_STRUCT__entry(
                             __field(u64, pe)
                             __field(u64, addr)
                             __field(u32, count)
                             __field(u32, incr)
-                            __field(u32, flags)
+                            __field(u64, flags)
                             ),
 
            TP_fast_assign(
@@ -310,7 +310,7 @@ TRACE_EVENT(amdgpu_vm_set_ptes,
                           __entry->incr = incr;
                           __entry->flags = flags;
                           ),
-           TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%08x, count=%u",
+           TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%llx, count=%u",
                      __entry->pe, __entry->addr, __entry->incr,
                      __entry->flags, __entry->count)
 );
index c9b131b13ef74de91a85afa562d2b2446c59a1bd..e6f9a54c959ddff1413ee4f212373bfb2d88a545 100644 (file)
 
 #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
 
+static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
+                            struct ttm_mem_reg *mem, unsigned num_pages,
+                            uint64_t offset, unsigned window,
+                            struct amdgpu_ring *ring,
+                            uint64_t *addr);
+
 static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
 static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev);
 
-
 /*
  * Global memory.
  */
@@ -97,6 +102,8 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
                goto error_bo;
        }
 
+       mutex_init(&adev->mman.gtt_window_lock);
+
        ring = adev->mman.buffer_funcs_ring;
        rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL];
        r = amd_sched_entity_init(&ring->sched, &adev->mman.entity,
@@ -123,6 +130,7 @@ static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
        if (adev->mman.mem_global_referenced) {
                amd_sched_entity_fini(adev->mman.entity.sched,
                                      &adev->mman.entity);
+               mutex_destroy(&adev->mman.gtt_window_lock);
                drm_global_item_unref(&adev->mman.bo_global_ref.ref);
                drm_global_item_unref(&adev->mman.mem_global_ref);
                adev->mman.mem_global_referenced = false;
@@ -150,7 +158,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
                break;
        case TTM_PL_TT:
                man->func = &amdgpu_gtt_mgr_func;
-               man->gpu_offset = adev->mc.gtt_start;
+               man->gpu_offset = adev->mc.gart_start;
                man->available_caching = TTM_PL_MASK_CACHING;
                man->default_caching = TTM_PL_FLAG_CACHED;
                man->flags = TTM_MEMTYPE_FLAG_MAPPABLE | TTM_MEMTYPE_FLAG_CMA;
@@ -186,12 +194,11 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 {
        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
        struct amdgpu_bo *abo;
-       static struct ttm_place placements = {
+       static const struct ttm_place placements = {
                .fpfn = 0,
                .lpfn = 0,
                .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
        };
-       unsigned i;
 
        if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) {
                placement->placement = &placements;
@@ -207,22 +214,36 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
                    adev->mman.buffer_funcs_ring &&
                    adev->mman.buffer_funcs_ring->ready == false) {
                        amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
+               } else if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+                          !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
+                       unsigned fpfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
+                       struct drm_mm_node *node = bo->mem.mm_node;
+                       unsigned long pages_left;
+
+                       for (pages_left = bo->mem.num_pages;
+                            pages_left;
+                            pages_left -= node->size, node++) {
+                               if (node->start < fpfn)
+                                       break;
+                       }
+
+                       if (!pages_left)
+                               goto gtt;
+
+                       /* Try evicting to the CPU inaccessible part of VRAM
+                        * first, but only set GTT as busy placement, so this
+                        * BO will be evicted to GTT rather than causing other
+                        * BOs to be evicted from VRAM
+                        */
+                       amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
+                                                        AMDGPU_GEM_DOMAIN_GTT);
+                       abo->placements[0].fpfn = fpfn;
+                       abo->placements[0].lpfn = 0;
+                       abo->placement.busy_placement = &abo->placements[1];
+                       abo->placement.num_busy_placement = 1;
                } else {
+gtt:
                        amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
-                       for (i = 0; i < abo->placement.num_placement; ++i) {
-                               if (!(abo->placements[i].flags &
-                                     TTM_PL_FLAG_TT))
-                                       continue;
-
-                               if (abo->placements[i].lpfn)
-                                       continue;
-
-                               /* set an upper limit to force directly
-                                * allocating address space for the BO.
-                                */
-                               abo->placements[i].lpfn =
-                                       adev->mc.gtt_size >> PAGE_SHIFT;
-                       }
                }
                break;
        case TTM_PL_TT:
@@ -252,29 +273,18 @@ static void amdgpu_move_null(struct ttm_buffer_object *bo,
        new_mem->mm_node = NULL;
 }
 
-static int amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
-                              struct drm_mm_node *mm_node,
-                              struct ttm_mem_reg *mem,
-                              uint64_t *addr)
+static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
+                                   struct drm_mm_node *mm_node,
+                                   struct ttm_mem_reg *mem)
 {
-       int r;
-
-       switch (mem->mem_type) {
-       case TTM_PL_TT:
-               r = amdgpu_ttm_bind(bo, mem);
-               if (r)
-                       return r;
+       uint64_t addr = 0;
 
-       case TTM_PL_VRAM:
-               *addr = mm_node->start << PAGE_SHIFT;
-               *addr += bo->bdev->man[mem->mem_type].gpu_offset;
-               break;
-       default:
-               DRM_ERROR("Unknown placement %d\n", mem->mem_type);
-               return -EINVAL;
+       if (mem->mem_type != TTM_PL_TT ||
+           amdgpu_gtt_mgr_is_allocated(mem)) {
+               addr = mm_node->start << PAGE_SHIFT;
+               addr += bo->bdev->man[mem->mem_type].gpu_offset;
        }
-
-       return 0;
+       return addr;
 }
 
 static int amdgpu_move_blit(struct ttm_buffer_object *bo,
@@ -299,26 +309,40 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
        }
 
        old_mm = old_mem->mm_node;
-       r = amdgpu_mm_node_addr(bo, old_mm, old_mem, &old_start);
-       if (r)
-               return r;
        old_size = old_mm->size;
-
+       old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem);
 
        new_mm = new_mem->mm_node;
-       r = amdgpu_mm_node_addr(bo, new_mm, new_mem, &new_start);
-       if (r)
-               return r;
        new_size = new_mm->size;
+       new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem);
 
        num_pages = new_mem->num_pages;
+       mutex_lock(&adev->mman.gtt_window_lock);
        while (num_pages) {
-               unsigned long cur_pages = min(old_size, new_size);
+               unsigned long cur_pages = min(min(old_size, new_size),
+                                             (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE);
+               uint64_t from = old_start, to = new_start;
                struct dma_fence *next;
 
-               r = amdgpu_copy_buffer(ring, old_start, new_start,
+               if (old_mem->mem_type == TTM_PL_TT &&
+                   !amdgpu_gtt_mgr_is_allocated(old_mem)) {
+                       r = amdgpu_map_buffer(bo, old_mem, cur_pages,
+                                             old_start, 0, ring, &from);
+                       if (r)
+                               goto error;
+               }
+
+               if (new_mem->mem_type == TTM_PL_TT &&
+                   !amdgpu_gtt_mgr_is_allocated(new_mem)) {
+                       r = amdgpu_map_buffer(bo, new_mem, cur_pages,
+                                             new_start, 1, ring, &to);
+                       if (r)
+                               goto error;
+               }
+
+               r = amdgpu_copy_buffer(ring, from, to,
                                       cur_pages * PAGE_SIZE,
-                                      bo->resv, &next, false);
+                                      bo->resv, &next, false, true);
                if (r)
                        goto error;
 
@@ -331,10 +355,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 
                old_size -= cur_pages;
                if (!old_size) {
-                       r = amdgpu_mm_node_addr(bo, ++old_mm, old_mem,
-                                               &old_start);
-                       if (r)
-                               goto error;
+                       old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem);
                        old_size = old_mm->size;
                } else {
                        old_start += cur_pages * PAGE_SIZE;
@@ -342,22 +363,21 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 
                new_size -= cur_pages;
                if (!new_size) {
-                       r = amdgpu_mm_node_addr(bo, ++new_mm, new_mem,
-                                               &new_start);
-                       if (r)
-                               goto error;
-
+                       new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem);
                        new_size = new_mm->size;
                } else {
                        new_start += cur_pages * PAGE_SIZE;
                }
        }
+       mutex_unlock(&adev->mman.gtt_window_lock);
 
        r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
        dma_fence_put(fence);
        return r;
 
 error:
+       mutex_unlock(&adev->mman.gtt_window_lock);
+
        if (fence)
                dma_fence_wait(fence, false);
        dma_fence_put(fence);
@@ -384,7 +404,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo,
        placement.num_busy_placement = 1;
        placement.busy_placement = &placements;
        placements.fpfn = 0;
-       placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT;
+       placements.lpfn = 0;
        placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
        r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
                             interruptible, no_wait_gpu);
@@ -431,7 +451,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo,
        placement.num_busy_placement = 1;
        placement.busy_placement = &placements;
        placements.fpfn = 0;
-       placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT;
+       placements.lpfn = 0;
        placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
        r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
                             interruptible, no_wait_gpu);
@@ -507,6 +527,15 @@ memcpy:
                }
        }
 
+       if (bo->type == ttm_bo_type_device &&
+           new_mem->mem_type == TTM_PL_VRAM &&
+           old_mem->mem_type != TTM_PL_VRAM) {
+               /* amdgpu_bo_fault_reserve_notify will re-set this if the CPU
+                * accesses the BO after it's moved.
+                */
+               abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+       }
+
        /* update statistics */
        atomic64_add((u64)bo->num_pages << PAGE_SHIFT, &adev->num_bytes_moved);
        return 0;
@@ -695,6 +724,31 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
        sg_free_table(ttm->sg);
 }
 
+static int amdgpu_ttm_do_bind(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
+{
+       struct amdgpu_ttm_tt *gtt = (void *)ttm;
+       uint64_t flags;
+       int r;
+
+       spin_lock(&gtt->adev->gtt_list_lock);
+       flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, mem);
+       gtt->offset = (u64)mem->start << PAGE_SHIFT;
+       r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
+               ttm->pages, gtt->ttm.dma_address, flags);
+
+       if (r) {
+               DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
+                         ttm->num_pages, gtt->offset);
+               goto error_gart_bind;
+       }
+
+       list_add_tail(&gtt->list, &gtt->adev->gtt_list);
+error_gart_bind:
+       spin_unlock(&gtt->adev->gtt_list_lock);
+       return r;
+
+}
+
 static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
                                   struct ttm_mem_reg *bo_mem)
 {
@@ -718,7 +772,10 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
            bo_mem->mem_type == AMDGPU_PL_OA)
                return -EINVAL;
 
-       return 0;
+       if (amdgpu_gtt_mgr_is_allocated(bo_mem))
+           r = amdgpu_ttm_do_bind(ttm, bo_mem);
+
+       return r;
 }
 
 bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
@@ -731,8 +788,6 @@ bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
 int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem)
 {
        struct ttm_tt *ttm = bo->ttm;
-       struct amdgpu_ttm_tt *gtt = (void *)bo->ttm;
-       uint64_t flags;
        int r;
 
        if (!ttm || amdgpu_ttm_is_bound(ttm))
@@ -745,22 +800,7 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem)
                return r;
        }
 
-       spin_lock(&gtt->adev->gtt_list_lock);
-       flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
-       gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
-       r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
-               ttm->pages, gtt->ttm.dma_address, flags);
-
-       if (r) {
-               DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
-                         ttm->num_pages, gtt->offset);
-               goto error_gart_bind;
-       }
-
-       list_add_tail(&gtt->list, &gtt->adev->gtt_list);
-error_gart_bind:
-       spin_unlock(&gtt->adev->gtt_list_lock);
-       return r;
+       return amdgpu_ttm_do_bind(ttm, bo_mem);
 }
 
 int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
@@ -1075,6 +1115,67 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
        return ttm_bo_eviction_valuable(bo, place);
 }
 
+static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
+                                   unsigned long offset,
+                                   void *buf, int len, int write)
+{
+       struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
+       struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+       struct drm_mm_node *nodes = abo->tbo.mem.mm_node;
+       uint32_t value = 0;
+       int ret = 0;
+       uint64_t pos;
+       unsigned long flags;
+
+       if (bo->mem.mem_type != TTM_PL_VRAM)
+               return -EIO;
+
+       while (offset >= (nodes->size << PAGE_SHIFT)) {
+               offset -= nodes->size << PAGE_SHIFT;
+               ++nodes;
+       }
+       pos = (nodes->start << PAGE_SHIFT) + offset;
+
+       while (len && pos < adev->mc.mc_vram_size) {
+               uint64_t aligned_pos = pos & ~(uint64_t)3;
+               uint32_t bytes = 4 - (pos & 3);
+               uint32_t shift = (pos & 3) * 8;
+               uint32_t mask = 0xffffffff << shift;
+
+               if (len < bytes) {
+                       mask &= 0xffffffff >> (bytes - len) * 8;
+                       bytes = len;
+               }
+
+               spin_lock_irqsave(&adev->mmio_idx_lock, flags);
+               WREG32(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
+               WREG32(mmMM_INDEX_HI, aligned_pos >> 31);
+               if (!write || mask != 0xffffffff)
+                       value = RREG32(mmMM_DATA);
+               if (write) {
+                       value &= ~mask;
+                       value |= (*(uint32_t *)buf << shift) & mask;
+                       WREG32(mmMM_DATA, value);
+               }
+               spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
+               if (!write) {
+                       value = (value & mask) >> shift;
+                       memcpy(buf, &value, bytes);
+               }
+
+               ret += bytes;
+               buf = (uint8_t *)buf + bytes;
+               pos += bytes;
+               len -= bytes;
+               if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) {
+                       ++nodes;
+                       pos = (nodes->start << PAGE_SHIFT);
+               }
+       }
+
+       return ret;
+}
+
 static struct ttm_bo_driver amdgpu_bo_driver = {
        .ttm_tt_create = &amdgpu_ttm_tt_create,
        .ttm_tt_populate = &amdgpu_ttm_tt_populate,
@@ -1090,11 +1191,14 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
        .io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
        .io_mem_free = &amdgpu_ttm_io_mem_free,
        .io_mem_pfn = amdgpu_ttm_io_mem_pfn,
+       .access_memory = &amdgpu_ttm_access_memory
 };
 
 int amdgpu_ttm_init(struct amdgpu_device *adev)
 {
+       uint64_t gtt_size;
        int r;
+       u64 vis_vram_limit;
 
        r = amdgpu_ttm_global_init(adev);
        if (r) {
@@ -1118,6 +1222,13 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
                DRM_ERROR("Failed initializing VRAM heap.\n");
                return r;
        }
+
+       /* Reduce size of CPU-visible VRAM if requested */
+       vis_vram_limit = (u64)amdgpu_vis_vram_limit * 1024 * 1024;
+       if (amdgpu_vis_vram_limit > 0 &&
+           vis_vram_limit <= adev->mc.visible_vram_size)
+               adev->mc.visible_vram_size = vis_vram_limit;
+
        /* Change the size here instead of the init above so only lpfn is affected */
        amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
 
@@ -1140,14 +1251,19 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
        }
        DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
                 (unsigned) (adev->mc.real_vram_size / (1024 * 1024)));
-       r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT,
-                               adev->mc.gtt_size >> PAGE_SHIFT);
+
+       if (amdgpu_gtt_size == -1)
+               gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
+                              adev->mc.mc_vram_size);
+       else
+               gtt_size = (uint64_t)amdgpu_gtt_size << 20;
+       r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT);
        if (r) {
                DRM_ERROR("Failed initializing GTT heap.\n");
                return r;
        }
        DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
-                (unsigned)(adev->mc.gtt_size / (1024 * 1024)));
+                (unsigned)(gtt_size / (1024 * 1024)));
 
        adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT;
        adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT;
@@ -1256,12 +1372,77 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)
        return ttm_bo_mmap(filp, vma, &adev->mman.bdev);
 }
 
-int amdgpu_copy_buffer(struct amdgpu_ring *ring,
-                      uint64_t src_offset,
-                      uint64_t dst_offset,
-                      uint32_t byte_count,
+static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
+                            struct ttm_mem_reg *mem, unsigned num_pages,
+                            uint64_t offset, unsigned window,
+                            struct amdgpu_ring *ring,
+                            uint64_t *addr)
+{
+       struct amdgpu_ttm_tt *gtt = (void *)bo->ttm;
+       struct amdgpu_device *adev = ring->adev;
+       struct ttm_tt *ttm = bo->ttm;
+       struct amdgpu_job *job;
+       unsigned num_dw, num_bytes;
+       dma_addr_t *dma_address;
+       struct dma_fence *fence;
+       uint64_t src_addr, dst_addr;
+       uint64_t flags;
+       int r;
+
+       BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
+              AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
+
+       *addr = adev->mc.gart_start;
+       *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
+               AMDGPU_GPU_PAGE_SIZE;
+
+       num_dw = adev->mman.buffer_funcs->copy_num_dw;
+       while (num_dw & 0x7)
+               num_dw++;
+
+       num_bytes = num_pages * 8;
+
+       r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, &job);
+       if (r)
+               return r;
+
+       src_addr = num_dw * 4;
+       src_addr += job->ibs[0].gpu_addr;
+
+       dst_addr = adev->gart.table_addr;
+       dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
+       amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
+                               dst_addr, num_bytes);
+
+       amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+       WARN_ON(job->ibs[0].length_dw > num_dw);
+
+       dma_address = &gtt->ttm.dma_address[offset >> PAGE_SHIFT];
+       flags = amdgpu_ttm_tt_pte_flags(adev, ttm, mem);
+       r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags,
+                           &job->ibs[0].ptr[num_dw]);
+       if (r)
+               goto error_free;
+
+       r = amdgpu_job_submit(job, ring, &adev->mman.entity,
+                             AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
+       if (r)
+               goto error_free;
+
+       dma_fence_put(fence);
+
+       return r;
+
+error_free:
+       amdgpu_job_free(job);
+       return r;
+}
+
+int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
+                      uint64_t dst_offset, uint32_t byte_count,
                       struct reservation_object *resv,
-                      struct dma_fence **fence, bool direct_submit)
+                      struct dma_fence **fence, bool direct_submit,
+                      bool vm_needs_flush)
 {
        struct amdgpu_device *adev = ring->adev;
        struct amdgpu_job *job;
@@ -1283,6 +1464,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
        if (r)
                return r;
 
+       job->vm_needs_flush = vm_needs_flush;
        if (resv) {
                r = amdgpu_sync_resv(adev, &job->sync, resv,
                                     AMDGPU_FENCE_OWNER_UNDEFINED);
@@ -1347,6 +1529,12 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
                return -EINVAL;
        }
 
+       if (bo->tbo.mem.mem_type == TTM_PL_TT) {
+               r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem);
+               if (r)
+                       return r;
+       }
+
        num_pages = bo->tbo.num_pages;
        mm_node = bo->tbo.mem.mm_node;
        num_loops = 0;
@@ -1382,11 +1570,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
                uint32_t byte_count = mm_node->size << PAGE_SHIFT;
                uint64_t dst_addr;
 
-               r = amdgpu_mm_node_addr(&bo->tbo, mm_node,
-                                       &bo->tbo.mem, &dst_addr);
-               if (r)
-                       return r;
-
+               dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem);
                while (byte_count) {
                        uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
 
@@ -1574,7 +1758,7 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
                                  adev, &amdgpu_ttm_gtt_fops);
        if (IS_ERR(ent))
                return PTR_ERR(ent);
-       i_size_write(ent->d_inode, adev->mc.gtt_size);
+       i_size_write(ent->d_inode, adev->mc.gart_size);
        adev->mman.gtt = ent;
 
 #endif
index 6bdede8ff12b4c37f1e679de83f7afd54014cc0b..f137c2458ee874a43b8620b1c32879b3b31b0c3e 100644 (file)
@@ -34,6 +34,9 @@
 #define AMDGPU_PL_FLAG_GWS             (TTM_PL_FLAG_PRIV << 1)
 #define AMDGPU_PL_FLAG_OA              (TTM_PL_FLAG_PRIV << 2)
 
+#define AMDGPU_GTT_MAX_TRANSFER_SIZE   512
+#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS        2
+
 struct amdgpu_mman {
        struct ttm_bo_global_ref        bo_global_ref;
        struct drm_global_reference     mem_global_ref;
@@ -49,6 +52,8 @@ struct amdgpu_mman {
        /* buffer handling */
        const struct amdgpu_buffer_funcs        *buffer_funcs;
        struct amdgpu_ring                      *buffer_funcs_ring;
+
+       struct mutex                            gtt_window_lock;
        /* Scheduler entity for buffer moves */
        struct amd_sched_entity                 entity;
 };
@@ -56,17 +61,17 @@ struct amdgpu_mman {
 extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func;
 extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func;
 
+bool amdgpu_gtt_mgr_is_allocated(struct ttm_mem_reg *mem);
 int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man,
                         struct ttm_buffer_object *tbo,
                         const struct ttm_place *place,
                         struct ttm_mem_reg *mem);
 
-int amdgpu_copy_buffer(struct amdgpu_ring *ring,
-                      uint64_t src_offset,
-                      uint64_t dst_offset,
-                      uint32_t byte_count,
+int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
+                      uint64_t dst_offset, uint32_t byte_count,
                       struct reservation_object *resv,
-                      struct dma_fence **fence, bool direct_submit);
+                      struct dma_fence **fence, bool direct_submit,
+                      bool vm_needs_flush);
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
                        uint32_t src_data,
                        struct reservation_object *resv,
@@ -75,5 +80,6 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
 bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
 int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem);
+int amdgpu_ttm_recover_gart(struct amdgpu_device *adev);
 
 #endif
index 4f50eeb6585534b54a5408360611798ccd2e447f..fcfb9d4f7477a8e833b363a9bfd419434006a50a 100644 (file)
@@ -275,14 +275,10 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
                else
                        return AMDGPU_FW_LOAD_PSP;
        case CHIP_RAVEN:
-#if 0
-               if (!load_type)
+               if (load_type != 2)
                        return AMDGPU_FW_LOAD_DIRECT;
                else
                        return AMDGPU_FW_LOAD_PSP;
-#else
-               return AMDGPU_FW_LOAD_DIRECT;
-#endif
        default:
                DRM_ERROR("Unknow firmware load type\n");
        }
@@ -377,6 +373,11 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
        struct amdgpu_firmware_info *ucode = NULL;
        const struct common_firmware_header *header = NULL;
 
+       if (!adev->firmware.fw_size) {
+               dev_warn(adev->dev, "No ip firmware need to load\n");
+               return 0;
+       }
+
        err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true,
                                amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
                                AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
@@ -459,6 +460,9 @@ int amdgpu_ucode_fini_bo(struct amdgpu_device *adev)
        int i;
        struct amdgpu_firmware_info *ucode = NULL;
 
+       if (!adev->firmware.fw_size)
+               return 0;
+
        for (i = 0; i < adev->firmware.max_ucodes; i++) {
                ucode = &adev->firmware.ucode[i];
                if (ucode->fw) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c
new file mode 100644 (file)
index 0000000..45ac918
--- /dev/null
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_vf_error.h"
+#include "mxgpu_ai.h"
+
+#define AMDGPU_VF_ERROR_ENTRY_SIZE    16 
+
+/* struct error_entry - amdgpu VF error information. */
+struct amdgpu_vf_error_buffer {
+       int read_count;
+       int write_count;
+       uint16_t code[AMDGPU_VF_ERROR_ENTRY_SIZE];
+       uint16_t flags[AMDGPU_VF_ERROR_ENTRY_SIZE];
+       uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
+};
+
+struct amdgpu_vf_error_buffer admgpu_vf_errors;
+
+
+void amdgpu_vf_error_put(uint16_t sub_error_code, uint16_t error_flags, uint64_t error_data)
+{
+       int index;
+       uint16_t error_code = AMDGIM_ERROR_CODE(AMDGIM_ERROR_CATEGORY_VF, sub_error_code);
+
+       index = admgpu_vf_errors.write_count % AMDGPU_VF_ERROR_ENTRY_SIZE;
+       admgpu_vf_errors.code [index] = error_code;
+       admgpu_vf_errors.flags [index] = error_flags;
+       admgpu_vf_errors.data [index] = error_data;
+       admgpu_vf_errors.write_count ++;
+}
+
+
+void amdgpu_vf_error_trans_all(struct amdgpu_device *adev)
+{
+       /* u32 pf2vf_flags = 0; */
+       u32 data1, data2, data3;
+       int index;
+
+       if ((NULL == adev) || (!amdgpu_sriov_vf(adev)) || (!adev->virt.ops) || (!adev->virt.ops->trans_msg)) {
+               return;
+       }
+/*
+       TODO: Enable these code when pv2vf_info is merged
+       AMDGPU_FW_VRAM_PF2VF_READ (adev, feature_flags, &pf2vf_flags);
+       if (!(pf2vf_flags & AMDGIM_FEATURE_ERROR_LOG_COLLECT)) {
+               return;
+       }
+*/
+       /* The errors are overlay of array, correct read_count as full. */
+       if (admgpu_vf_errors.write_count - admgpu_vf_errors.read_count > AMDGPU_VF_ERROR_ENTRY_SIZE) {
+               admgpu_vf_errors.read_count = admgpu_vf_errors.write_count - AMDGPU_VF_ERROR_ENTRY_SIZE;
+       }
+
+       while (admgpu_vf_errors.read_count < admgpu_vf_errors.write_count) {
+               index =admgpu_vf_errors.read_count % AMDGPU_VF_ERROR_ENTRY_SIZE;
+               data1 = AMDGIM_ERROR_CODE_FLAGS_TO_MAILBOX (admgpu_vf_errors.code[index], admgpu_vf_errors.flags[index]);
+               data2 = admgpu_vf_errors.data[index] & 0xFFFFFFFF;
+               data3 = (admgpu_vf_errors.data[index] >> 32) & 0xFFFFFFFF;
+
+               adev->virt.ops->trans_msg(adev, IDH_LOG_VF_ERROR, data1, data2, data3);
+               admgpu_vf_errors.read_count ++;
+       }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h
new file mode 100644 (file)
index 0000000..2a3278e
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VF_ERROR_H__
+#define __VF_ERROR_H__
+
+#define AMDGIM_ERROR_CODE_FLAGS_TO_MAILBOX(c,f)    (((c & 0xFFFF) << 16) | (f & 0xFFFF))
+#define AMDGIM_ERROR_CODE(t,c)       (((t&0xF)<<12)|(c&0xFFF))
+
+/* Please keep enum same as AMD GIM driver */
+enum AMDGIM_ERROR_VF {
+       AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL = 0,
+       AMDGIM_ERROR_VF_NO_VBIOS,
+       AMDGIM_ERROR_VF_GPU_POST_ERROR,
+       AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL,
+       AMDGIM_ERROR_VF_FENCE_INIT_FAIL,
+
+       AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL,
+       AMDGIM_ERROR_VF_IB_INIT_FAIL,
+       AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL,
+       AMDGIM_ERROR_VF_ASIC_RESUME_FAIL,
+       AMDGIM_ERROR_VF_GPU_RESET_FAIL,
+
+       AMDGIM_ERROR_VF_TEST,
+       AMDGIM_ERROR_VF_MAX
+};
+
+enum AMDGIM_ERROR_CATEGORY {
+       AMDGIM_ERROR_CATEGORY_NON_USED = 0,
+       AMDGIM_ERROR_CATEGORY_GIM,
+       AMDGIM_ERROR_CATEGORY_PF,
+       AMDGIM_ERROR_CATEGORY_VF,
+       AMDGIM_ERROR_CATEGORY_VBIOS,
+       AMDGIM_ERROR_CATEGORY_MONITOR,
+
+       AMDGIM_ERROR_CATEGORY_MAX
+};
+
+void amdgpu_vf_error_put(uint16_t sub_error_code, uint16_t error_flags, uint64_t error_data);
+void amdgpu_vf_error_trans_all (struct amdgpu_device *adev);
+
+#endif /* __VF_ERROR_H__ */
index 9e1062edb76eb0649a0a97291c320222fd21bfa9..e5b1baf387c1ffcd5a0b3c5546b697b94c97a011 100644 (file)
@@ -43,6 +43,7 @@ struct amdgpu_virt_ops {
        int (*req_full_gpu)(struct amdgpu_device *adev, bool init);
        int (*rel_full_gpu)(struct amdgpu_device *adev, bool init);
        int (*reset_gpu)(struct amdgpu_device *adev);
+       void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3);
 };
 
 /* GPU virtualization */
index 5795f81369f0fc6e68f0659c400a700091713db8..250c8e80e646bbf800143937b6c96f6e065235ca 100644 (file)
@@ -77,8 +77,6 @@ struct amdgpu_pte_update_params {
        void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe,
                     uint64_t addr, unsigned count, uint32_t incr,
                     uint64_t flags);
-       /* indicate update pt or its shadow */
-       bool shadow;
        /* The next two are used during VM update by CPU
         *  DMA addresses to use for mapping
         *  Kernel pointer of PD/PT BO that needs to be updated
@@ -161,11 +159,17 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
  */
 static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent,
                                    int (*validate)(void *, struct amdgpu_bo *),
-                                   void *param)
+                                   void *param, bool use_cpu_for_update)
 {
        unsigned i;
        int r;
 
+       if (use_cpu_for_update) {
+               r = amdgpu_bo_kmap(parent->bo, NULL);
+               if (r)
+                       return r;
+       }
+
        if (!parent->entries)
                return 0;
 
@@ -183,7 +187,8 @@ static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent,
                 * Recurse into the sub directory. This is harmless because we
                 * have only a maximum of 5 layers.
                 */
-               r = amdgpu_vm_validate_level(entry, validate, param);
+               r = amdgpu_vm_validate_level(entry, validate, param,
+                                            use_cpu_for_update);
                if (r)
                        return r;
        }
@@ -214,7 +219,8 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
        if (num_evictions == vm->last_eviction_counter)
                return 0;
 
-       return amdgpu_vm_validate_level(&vm->root, validate, param);
+       return amdgpu_vm_validate_level(&vm->root, validate, param,
+                                       vm->use_cpu_for_update);
 }
 
 /**
@@ -331,6 +337,14 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
                        if (r)
                                return r;
 
+                       if (vm->use_cpu_for_update) {
+                               r = amdgpu_bo_kmap(pt, NULL);
+                               if (r) {
+                                       amdgpu_bo_unref(&pt);
+                                       return r;
+                               }
+                       }
+
                        /* Keep a reference to the root directory to avoid
                        * freeing them up in the wrong order.
                        */
@@ -338,6 +352,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 
                        entry->bo = pt;
                        entry->addr = 0;
+                       entry->huge_page = false;
                }
 
                if (level < adev->vm_manager.num_level) {
@@ -424,7 +439,7 @@ static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm,
        struct dma_fence *updates = sync->last_vm_update;
        int r = 0;
        struct dma_fence *flushed, *tmp;
-       bool needs_flush = false;
+       bool needs_flush = vm->use_cpu_for_update;
 
        flushed  = id->flushed_updates;
        if ((amdgpu_vm_had_gpu_reset(adev, id)) ||
@@ -545,11 +560,11 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
        }
        kfree(fences);
 
-       job->vm_needs_flush = false;
+       job->vm_needs_flush = vm->use_cpu_for_update;
        /* Check if we can use a VMID already assigned to this VM */
        list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) {
                struct dma_fence *flushed;
-               bool needs_flush = false;
+               bool needs_flush = vm->use_cpu_for_update;
 
                /* Check all the prerequisites to using this VMID */
                if (amdgpu_vm_had_gpu_reset(adev, id))
@@ -745,7 +760,7 @@ static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
  *
  * Emit a VM flush when it is necessary.
  */
-int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
+int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync)
 {
        struct amdgpu_device *adev = ring->adev;
        unsigned vmhub = ring->funcs->vmhub;
@@ -767,12 +782,15 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
                vm_flush_needed = true;
        }
 
-       if (!vm_flush_needed && !gds_switch_needed)
+       if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
                return 0;
 
        if (ring->funcs->init_cond_exec)
                patch_offset = amdgpu_ring_init_cond_exec(ring);
 
+       if (need_pipe_sync)
+               amdgpu_ring_emit_pipeline_sync(ring);
+
        if (ring->funcs->emit_vm_flush && vm_flush_needed) {
                struct dma_fence *fence;
 
@@ -981,6 +999,8 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
        unsigned int i;
        uint64_t value;
 
+       trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
+
        for (i = 0; i < count; i++) {
                value = params->pages_addr ?
                        amdgpu_vm_map_gart(params->pages_addr, addr) :
@@ -989,19 +1009,16 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
                                        i, value, flags);
                addr += incr;
        }
-
-       /* Flush HDP */
-       mb();
-       amdgpu_gart_flush_gpu_tlb(params->adev, 0);
 }
 
-static int amdgpu_vm_bo_wait(struct amdgpu_device *adev, struct amdgpu_bo *bo)
+static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+                            void *owner)
 {
        struct amdgpu_sync sync;
        int r;
 
        amdgpu_sync_create(&sync);
-       amdgpu_sync_resv(adev, &sync, bo->tbo.resv, AMDGPU_FENCE_OWNER_VM);
+       amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.resv, owner);
        r = amdgpu_sync_wait(&sync, true);
        amdgpu_sync_free(&sync);
 
@@ -1042,16 +1059,12 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
        params.adev = adev;
        shadow = parent->bo->shadow;
 
-       WARN_ON(vm->use_cpu_for_update && shadow);
-       if (vm->use_cpu_for_update && !shadow) {
-               r = amdgpu_bo_kmap(parent->bo, (void **)&pd_addr);
-               if (r)
-                       return r;
-               r = amdgpu_vm_bo_wait(adev, parent->bo);
-               if (unlikely(r)) {
-                       amdgpu_bo_kunmap(parent->bo);
+       if (vm->use_cpu_for_update) {
+               pd_addr = (unsigned long)parent->bo->kptr;
+               r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
+               if (unlikely(r))
                        return r;
-               }
+
                params.func = amdgpu_vm_cpu_set_ptes;
        } else {
                if (shadow) {
@@ -1105,7 +1118,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 
                pt = amdgpu_bo_gpu_offset(bo);
                pt = amdgpu_gart_get_vm_pde(adev, pt);
-               if (parent->entries[pt_idx].addr == pt)
+               if (parent->entries[pt_idx].addr == pt ||
+                   parent->entries[pt_idx].huge_page)
                        continue;
 
                parent->entries[pt_idx].addr = pt;
@@ -1146,28 +1160,29 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
                            count, incr, AMDGPU_PTE_VALID);
        }
 
-       if (params.func == amdgpu_vm_cpu_set_ptes)
-               amdgpu_bo_kunmap(parent->bo);
-       else if (params.ib->length_dw == 0) {
-               amdgpu_job_free(job);
-       } else {
-               amdgpu_ring_pad_ib(ring, params.ib);
-               amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv,
-                                AMDGPU_FENCE_OWNER_VM);
-               if (shadow)
-                       amdgpu_sync_resv(adev, &job->sync, shadow->tbo.resv,
+       if (!vm->use_cpu_for_update) {
+               if (params.ib->length_dw == 0) {
+                       amdgpu_job_free(job);
+               } else {
+                       amdgpu_ring_pad_ib(ring, params.ib);
+                       amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv,
                                         AMDGPU_FENCE_OWNER_VM);
+                       if (shadow)
+                               amdgpu_sync_resv(adev, &job->sync,
+                                                shadow->tbo.resv,
+                                                AMDGPU_FENCE_OWNER_VM);
+
+                       WARN_ON(params.ib->length_dw > ndw);
+                       r = amdgpu_job_submit(job, ring, &vm->entity,
+                                       AMDGPU_FENCE_OWNER_VM, &fence);
+                       if (r)
+                               goto error_free;
 
-               WARN_ON(params.ib->length_dw > ndw);
-               r = amdgpu_job_submit(job, ring, &vm->entity,
-                               AMDGPU_FENCE_OWNER_VM, &fence);
-               if (r)
-                       goto error_free;
-
-               amdgpu_bo_fence(parent->bo, fence, true);
-               dma_fence_put(vm->last_dir_update);
-               vm->last_dir_update = dma_fence_get(fence);
-               dma_fence_put(fence);
+                       amdgpu_bo_fence(parent->bo, fence, true);
+                       dma_fence_put(vm->last_dir_update);
+                       vm->last_dir_update = dma_fence_get(fence);
+                       dma_fence_put(fence);
+               }
        }
        /*
         * Recurse into the subdirectories. This recursion is harmless because
@@ -1235,33 +1250,105 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
        if (r)
                amdgpu_vm_invalidate_level(&vm->root);
 
+       if (vm->use_cpu_for_update) {
+               /* Flush HDP */
+               mb();
+               amdgpu_gart_flush_gpu_tlb(adev, 0);
+       }
+
        return r;
 }
 
 /**
- * amdgpu_vm_find_pt - find the page table for an address
+ * amdgpu_vm_find_entry - find the entry for an address
  *
  * @p: see amdgpu_pte_update_params definition
  * @addr: virtual address in question
+ * @entry: resulting entry or NULL
+ * @parent: parent entry
  *
- * Find the page table BO for a virtual address, return NULL when none found.
+ * Find the vm_pt entry and it's parent for the given address.
  */
-static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
-                                         uint64_t addr)
+void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
+                        struct amdgpu_vm_pt **entry,
+                        struct amdgpu_vm_pt **parent)
 {
-       struct amdgpu_vm_pt *entry = &p->vm->root;
        unsigned idx, level = p->adev->vm_manager.num_level;
 
-       while (entry->entries) {
+       *parent = NULL;
+       *entry = &p->vm->root;
+       while ((*entry)->entries) {
                idx = addr >> (p->adev->vm_manager.block_size * level--);
-               idx %= amdgpu_bo_size(entry->bo) / 8;
-               entry = &entry->entries[idx];
+               idx %= amdgpu_bo_size((*entry)->bo) / 8;
+               *parent = *entry;
+               *entry = &(*entry)->entries[idx];
        }
 
        if (level)
-               return NULL;
+               *entry = NULL;
+}
+
+/**
+ * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
+ *
+ * @p: see amdgpu_pte_update_params definition
+ * @entry: vm_pt entry to check
+ * @parent: parent entry
+ * @nptes: number of PTEs updated with this operation
+ * @dst: destination address where the PTEs should point to
+ * @flags: access flags fro the PTEs
+ *
+ * Check if we can update the PD with a huge page.
+ */
+static int amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
+                                      struct amdgpu_vm_pt *entry,
+                                      struct amdgpu_vm_pt *parent,
+                                      unsigned nptes, uint64_t dst,
+                                      uint64_t flags)
+{
+       bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes);
+       uint64_t pd_addr, pde;
+       int r;
 
-       return entry->bo;
+       /* In the case of a mixed PT the PDE must point to it*/
+       if (p->adev->asic_type < CHIP_VEGA10 ||
+           nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
+           p->func == amdgpu_vm_do_copy_ptes ||
+           !(flags & AMDGPU_PTE_VALID)) {
+
+               dst = amdgpu_bo_gpu_offset(entry->bo);
+               dst = amdgpu_gart_get_vm_pde(p->adev, dst);
+               flags = AMDGPU_PTE_VALID;
+       } else {
+               flags |= AMDGPU_PDE_PTE;
+       }
+
+       if (entry->addr == dst &&
+           entry->huge_page == !!(flags & AMDGPU_PDE_PTE))
+               return 0;
+
+       entry->addr = dst;
+       entry->huge_page = !!(flags & AMDGPU_PDE_PTE);
+
+       if (use_cpu_update) {
+               r = amdgpu_bo_kmap(parent->bo, (void *)&pd_addr);
+               if (r)
+                       return r;
+
+               pde = pd_addr + (entry - parent->entries) * 8;
+               amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags);
+       } else {
+               if (parent->bo->shadow) {
+                       pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
+                       pde = pd_addr + (entry - parent->entries) * 8;
+                       amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
+               }
+               pd_addr = amdgpu_bo_gpu_offset(parent->bo);
+               pde = pd_addr + (entry - parent->entries) * 8;
+               amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
+       }
+
+       return 0;
 }
 
 /**
@@ -1287,49 +1374,47 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
        uint64_t addr, pe_start;
        struct amdgpu_bo *pt;
        unsigned nptes;
-       int r;
        bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes);
-
+       int r;
 
        /* walk over the address space and update the page tables */
-       for (addr = start; addr < end; addr += nptes) {
-               pt = amdgpu_vm_get_pt(params, addr);
-               if (!pt) {
-                       pr_err("PT not found, aborting update_ptes\n");
-                       return -EINVAL;
-               }
+       for (addr = start; addr < end; addr += nptes,
+            dst += nptes * AMDGPU_GPU_PAGE_SIZE) {
+               struct amdgpu_vm_pt *entry, *parent;
 
-               if (params->shadow) {
-                       if (WARN_ONCE(use_cpu_update,
-                               "CPU VM update doesn't suuport shadow pages"))
-                               return 0;
-
-                       if (!pt->shadow)
-                               return 0;
-                       pt = pt->shadow;
-               }
+               amdgpu_vm_get_entry(params, addr, &entry, &parent);
+               if (!entry)
+                       return -ENOENT;
 
                if ((addr & ~mask) == (end & ~mask))
                        nptes = end - addr;
                else
                        nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
 
+               r = amdgpu_vm_handle_huge_pages(params, entry, parent,
+                                               nptes, dst, flags);
+               if (r)
+                       return r;
+
+               if (entry->huge_page)
+                       continue;
+
+               pt = entry->bo;
                if (use_cpu_update) {
-                       r = amdgpu_bo_kmap(pt, (void *)&pe_start);
-                       if (r)
-                               return r;
-               } else
+                       pe_start = (unsigned long)pt->kptr;
+               } else {
+                       if (pt->shadow) {
+                               pe_start = amdgpu_bo_gpu_offset(pt->shadow);
+                               pe_start += (addr & mask) * 8;
+                               params->func(params, pe_start, dst, nptes,
+                                            AMDGPU_GPU_PAGE_SIZE, flags);
+                       }
                        pe_start = amdgpu_bo_gpu_offset(pt);
+               }
 
                pe_start += (addr & mask) * 8;
-
                params->func(params, pe_start, dst, nptes,
                             AMDGPU_GPU_PAGE_SIZE, flags);
-
-               dst += nptes * AMDGPU_GPU_PAGE_SIZE;
-
-               if (use_cpu_update)
-                       amdgpu_bo_kunmap(pt);
        }
 
        return 0;
@@ -1372,8 +1457,9 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params    *params,
         */
 
        /* SI and newer are optimized for 64KB */
-       uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG);
-       uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG;
+       unsigned pages_per_frag = AMDGPU_LOG2_PAGES_PER_FRAG(params->adev);
+       uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
+       uint64_t frag_align = 1 << pages_per_frag;
 
        uint64_t frag_start = ALIGN(start, frag_align);
        uint64_t frag_end = end & ~(frag_align - 1);
@@ -1445,6 +1531,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
        params.vm = vm;
        params.src = src;
 
+       /* sync to everything on unmapping */
+       if (!(flags & AMDGPU_PTE_VALID))
+               owner = AMDGPU_FENCE_OWNER_UNDEFINED;
+
        if (vm->use_cpu_for_update) {
                /* params.src is used as flag to indicate system Memory */
                if (pages_addr)
@@ -1453,23 +1543,18 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
                /* Wait for PT BOs to be free. PTs share the same resv. object
                 * as the root PD BO
                 */
-               r = amdgpu_vm_bo_wait(adev, vm->root.bo);
+               r = amdgpu_vm_wait_pd(adev, vm, owner);
                if (unlikely(r))
                        return r;
 
                params.func = amdgpu_vm_cpu_set_ptes;
                params.pages_addr = pages_addr;
-               params.shadow = false;
                return amdgpu_vm_frag_ptes(&params, start, last + 1,
                                           addr, flags);
        }
 
        ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
 
-       /* sync to everything on unmapping */
-       if (!(flags & AMDGPU_PTE_VALID))
-               owner = AMDGPU_FENCE_OWNER_UNDEFINED;
-
        nptes = last - start + 1;
 
        /*
@@ -1481,6 +1566,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
        /* padding, etc. */
        ndw = 64;
 
+       /* one PDE write for each huge page */
+       ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6;
+
        if (src) {
                /* only copy commands needed */
                ndw += ncmds * 7;
@@ -1542,11 +1630,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
        if (r)
                goto error_free;
 
-       params.shadow = true;
-       r = amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
-       if (r)
-               goto error_free;
-       params.shadow = false;
        r = amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
        if (r)
                goto error_free;
@@ -1565,6 +1648,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 error_free:
        amdgpu_job_free(job);
+       amdgpu_vm_invalidate_level(&vm->root);
        return r;
 }
 
@@ -1752,6 +1836,12 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
                list_add(&bo_va->vm_status, &vm->cleared);
        spin_unlock(&vm->status_lock);
 
+       if (vm->use_cpu_for_update) {
+               /* Flush HDP */
+               mb();
+               amdgpu_gart_flush_gpu_tlb(adev, 0);
+       }
+
        return 0;
 }
 
@@ -2457,6 +2547,13 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
                goto error_free_root;
 
        vm->last_eviction_counter = atomic64_read(&adev->num_evictions);
+
+       if (vm->use_cpu_for_update) {
+               r = amdgpu_bo_kmap(vm->root.bo, NULL);
+               if (r)
+                       goto error_free_root;
+       }
+
        amdgpu_bo_unreserve(vm->root.bo);
 
        return 0;
index 936f158bc5ec651fae06acc5e9a62a0ce0763355..34d9174ebff22a540f25cf517465fc761a43f4a1 100644 (file)
@@ -51,7 +51,9 @@ struct amdgpu_bo_list_entry;
 #define AMDGPU_VM_PTB_ALIGN_SIZE   32768
 
 /* LOG2 number of continuous pages for the fragment field */
-#define AMDGPU_LOG2_PAGES_PER_FRAG 4
+#define AMDGPU_LOG2_PAGES_PER_FRAG(adev) \
+       ((adev)->asic_type < CHIP_VEGA10 ? 4 : \
+        (adev)->vm_manager.block_size)
 
 #define AMDGPU_PTE_VALID       (1ULL << 0)
 #define AMDGPU_PTE_SYSTEM      (1ULL << 1)
@@ -68,6 +70,9 @@ struct amdgpu_bo_list_entry;
 /* TILED for VEGA10, reserved for older ASICs  */
 #define AMDGPU_PTE_PRT         (1ULL << 51)
 
+/* PDE is handled as PTE for VEGA10 */
+#define AMDGPU_PDE_PTE         (1ULL << 54)
+
 /* VEGA10 only */
 #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
 #define AMDGPU_PTE_MTYPE_MASK  AMDGPU_PTE_MTYPE(3ULL)
@@ -98,6 +103,7 @@ struct amdgpu_bo_list_entry;
 struct amdgpu_vm_pt {
        struct amdgpu_bo        *bo;
        uint64_t                addr;
+       bool                    huge_page;
 
        /* array of page tables, one for each directory entry */
        struct amdgpu_vm_pt     *entries;
@@ -222,7 +228,7 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
                      struct amdgpu_sync *sync, struct dma_fence *fence,
                      struct amdgpu_job *job);
-int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job);
+int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
 void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
                        unsigned vmid);
 void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev);
index 37a499ab30eb02fc0506727b8004c71dc640db58..567c4a5cf90cc2ce927c151c2665d4d2ab5a8395 100644 (file)
@@ -1824,21 +1824,14 @@ static int cik_common_suspend(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-       amdgpu_amdkfd_suspend(adev);
-
        return cik_common_hw_fini(adev);
 }
 
 static int cik_common_resume(void *handle)
 {
-       int r;
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-       r = cik_common_hw_init(adev);
-       if (r)
-               return r;
-
-       return amdgpu_amdkfd_resume(adev);
+       return cik_common_hw_init(adev);
 }
 
 static bool cik_common_is_idle(void *handle)
index c216e16826c99df35e1ab22c9c14a97144fff9ad..f508f4d01e4a9000f633c85e290964098e8c1b86 100644 (file)
@@ -341,6 +341,63 @@ static void cik_sdma_rlc_stop(struct amdgpu_device *adev)
        /* XXX todo */
 }
 
+/**
+ * cik_ctx_switch_enable - stop the async dma engines context switch
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs context switch.
+ *
+ * Halt or unhalt the async dma engines context switch (VI).
+ */
+static void cik_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
+{
+       u32 f32_cntl, phase_quantum = 0;
+       int i;
+
+       if (amdgpu_sdma_phase_quantum) {
+               unsigned value = amdgpu_sdma_phase_quantum;
+               unsigned unit = 0;
+
+               while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+                               SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
+                       value = (value + 1) >> 1;
+                       unit++;
+               }
+               if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+                           SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
+                       value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+                                SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
+                       unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+                               SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
+                       WARN_ONCE(1,
+                       "clamping sdma_phase_quantum to %uK clock cycles\n",
+                                 value << unit);
+               }
+               phase_quantum =
+                       value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
+                       unit  << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
+       }
+
+       for (i = 0; i < adev->sdma.num_instances; i++) {
+               f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]);
+               if (enable) {
+                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+                                       AUTO_CTXSW_ENABLE, 1);
+                       if (amdgpu_sdma_phase_quantum) {
+                               WREG32(mmSDMA0_PHASE0_QUANTUM + sdma_offsets[i],
+                                      phase_quantum);
+                               WREG32(mmSDMA0_PHASE1_QUANTUM + sdma_offsets[i],
+                                      phase_quantum);
+                       }
+               } else {
+                       f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+                                       AUTO_CTXSW_ENABLE, 0);
+               }
+
+               WREG32(mmSDMA0_CNTL + sdma_offsets[i], f32_cntl);
+       }
+}
+
 /**
  * cik_sdma_enable - stop the async dma engines
  *
@@ -537,6 +594,8 @@ static int cik_sdma_start(struct amdgpu_device *adev)
 
        /* halt the engine before programing */
        cik_sdma_enable(adev, false);
+       /* enable sdma ring preemption */
+       cik_ctx_switch_enable(adev, true);
 
        /* start the gfx rings and rlc compute queues */
        r = cik_sdma_gfx_resume(adev);
@@ -984,6 +1043,7 @@ static int cik_sdma_hw_fini(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       cik_ctx_switch_enable(adev, false);
        cik_sdma_enable(adev, false);
 
        return 0;
index 18fd01f3e4b245d7d9f4cf24a97daa20beeaee66..003a131bad474db5d28584735c0e399b1def20d6 100644 (file)
@@ -1,24 +1,25 @@
-
 /*
-***************************************************************************************************
-*
-*  Trade secret of Advanced Micro Devices, Inc.
-*  Copyright (c) 2010 Advanced Micro Devices, Inc. (unpublished)
-*
-*  All rights reserved.  This notice is intended as a precaution against inadvertent publication and
-*  does not imply publication or any waiver of confidentiality.  The year included in the foregoing
-*  notice is the year of creation of the work.
-*
-***************************************************************************************************
-*/
-/**
-***************************************************************************************************
-* @brief gfx9 Clearstate Definitions
-***************************************************************************************************
-*
-*   Do not edit! This is a machine-generated file!
-*
-*/
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
 
 static const unsigned int gfx9_SECT_CONTEXT_def_1[] =
 {
index aff1f48c947eb7e843895f2509fc5c597304ade1..4b6e2f7bfec9feae37991ba7c6e46712bdecbe5c 100644 (file)
@@ -484,134 +484,6 @@ static bool dce_v10_0_is_display_hung(struct amdgpu_device *adev)
        return true;
 }
 
-static void dce_v10_0_stop_mc_access(struct amdgpu_device *adev,
-                                    struct amdgpu_mode_mc_save *save)
-{
-       u32 crtc_enabled, tmp;
-       int i;
-
-       save->vga_render_control = RREG32(mmVGA_RENDER_CONTROL);
-       save->vga_hdp_control = RREG32(mmVGA_HDP_CONTROL);
-
-       /* disable VGA render */
-       tmp = RREG32(mmVGA_RENDER_CONTROL);
-       tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
-       WREG32(mmVGA_RENDER_CONTROL, tmp);
-
-       /* blank the display controllers */
-       for (i = 0; i < adev->mode_info.num_crtc; i++) {
-               crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]),
-                                            CRTC_CONTROL, CRTC_MASTER_EN);
-               if (crtc_enabled) {
-#if 0
-                       u32 frame_count;
-                       int j;
-
-                       save->crtc_enabled[i] = true;
-                       tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]);
-                       if (REG_GET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN) == 0) {
-                               amdgpu_display_vblank_wait(adev, i);
-                               WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
-                               tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 1);
-                               WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp);
-                               WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
-                       }
-                       /* wait for the next frame */
-                       frame_count = amdgpu_display_vblank_get_counter(adev, i);
-                       for (j = 0; j < adev->usec_timeout; j++) {
-                               if (amdgpu_display_vblank_get_counter(adev, i) != frame_count)
-                                       break;
-                               udelay(1);
-                       }
-                       tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]);
-                       if (REG_GET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK) == 0) {
-                               tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 1);
-                               WREG32(mmGRPH_UPDATE + crtc_offsets[i], tmp);
-                       }
-                       tmp = RREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i]);
-                       if (REG_GET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK) == 0) {
-                               tmp = REG_SET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK, 1);
-                               WREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i], tmp);
-                       }
-#else
-                       /* XXX this is a hack to avoid strange behavior with EFI on certain systems */
-                       WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
-                       tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]);
-                       tmp = REG_SET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN, 0);
-                       WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp);
-                       WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
-                       save->crtc_enabled[i] = false;
-                       /* ***** */
-#endif
-               } else {
-                       save->crtc_enabled[i] = false;
-               }
-       }
-}
-
-static void dce_v10_0_resume_mc_access(struct amdgpu_device *adev,
-                                      struct amdgpu_mode_mc_save *save)
-{
-       u32 tmp, frame_count;
-       int i, j;
-
-       /* update crtc base addresses */
-       for (i = 0; i < adev->mode_info.num_crtc; i++) {
-               WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i],
-                      upper_32_bits(adev->mc.vram_start));
-               WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i],
-                      upper_32_bits(adev->mc.vram_start));
-               WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + crtc_offsets[i],
-                      (u32)adev->mc.vram_start);
-               WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + crtc_offsets[i],
-                      (u32)adev->mc.vram_start);
-
-               if (save->crtc_enabled[i]) {
-                       tmp = RREG32(mmMASTER_UPDATE_MODE + crtc_offsets[i]);
-                       if (REG_GET_FIELD(tmp, MASTER_UPDATE_MODE, MASTER_UPDATE_MODE) != 0) {
-                               tmp = REG_SET_FIELD(tmp, MASTER_UPDATE_MODE, MASTER_UPDATE_MODE, 0);
-                               WREG32(mmMASTER_UPDATE_MODE + crtc_offsets[i], tmp);
-                       }
-                       tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]);
-                       if (REG_GET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK)) {
-                               tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 0);
-                               WREG32(mmGRPH_UPDATE + crtc_offsets[i], tmp);
-                       }
-                       tmp = RREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i]);
-                       if (REG_GET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK)) {
-                               tmp = REG_SET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK, 0);
-                               WREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i], tmp);
-                       }
-                       for (j = 0; j < adev->usec_timeout; j++) {
-                               tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]);
-                               if (REG_GET_FIELD(tmp, GRPH_UPDATE, GRPH_SURFACE_UPDATE_PENDING) == 0)
-                                       break;
-                               udelay(1);
-                       }
-                       tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]);
-                       tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 0);
-                       WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
-                       WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp);
-                       WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
-                       /* wait for the next frame */
-                       frame_count = amdgpu_display_vblank_get_counter(adev, i);
-                       for (j = 0; j < adev->usec_timeout; j++) {
-                               if (amdgpu_display_vblank_get_counter(adev, i) != frame_count)
-                                       break;
-                               udelay(1);
-                       }
-               }
-       }
-
-       WREG32(mmVGA_MEMORY_BASE_ADDRESS_HIGH, upper_32_bits(adev->mc.vram_start));
-       WREG32(mmVGA_MEMORY_BASE_ADDRESS, lower_32_bits(adev->mc.vram_start));
-
-       /* Unlock vga access */
-       WREG32(mmVGA_HDP_CONTROL, save->vga_hdp_control);
-       mdelay(1);
-       WREG32(mmVGA_RENDER_CONTROL, save->vga_render_control);
-}
-
 static void dce_v10_0_set_vga_render_state(struct amdgpu_device *adev,
                                           bool render)
 {
@@ -3025,6 +2897,8 @@ static int dce_v10_0_hw_init(void *handle)
 
        dce_v10_0_init_golden_registers(adev);
 
+       /* disable vga render */
+       dce_v10_0_set_vga_render_state(adev, false);
        /* init dig PHYs, disp eng pll */
        amdgpu_atombios_encoder_init_dig(adev);
        amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk);
@@ -3737,7 +3611,6 @@ static void dce_v10_0_encoder_add(struct amdgpu_device *adev,
 }
 
 static const struct amdgpu_display_funcs dce_v10_0_display_funcs = {
-       .set_vga_render_state = &dce_v10_0_set_vga_render_state,
        .bandwidth_update = &dce_v10_0_bandwidth_update,
        .vblank_get_counter = &dce_v10_0_vblank_get_counter,
        .vblank_wait = &dce_v10_0_vblank_wait,
@@ -3750,8 +3623,6 @@ static const struct amdgpu_display_funcs dce_v10_0_display_funcs = {
        .page_flip_get_scanoutpos = &dce_v10_0_crtc_get_scanoutpos,
        .add_encoder = &dce_v10_0_encoder_add,
        .add_connector = &amdgpu_connector_add,
-       .stop_mc_access = &dce_v10_0_stop_mc_access,
-       .resume_mc_access = &dce_v10_0_resume_mc_access,
 };
 
 static void dce_v10_0_set_display_funcs(struct amdgpu_device *adev)
index 2df650dfa727ed7b2a3465ceb5c74c38727e6d0a..6af489872ffd98e3ab5a21030bb975848bc68d0b 100644 (file)
@@ -499,79 +499,6 @@ static bool dce_v11_0_is_display_hung(struct amdgpu_device *adev)
        return true;
 }
 
-static void dce_v11_0_stop_mc_access(struct amdgpu_device *adev,
-                                    struct amdgpu_mode_mc_save *save)
-{
-       u32 crtc_enabled, tmp;
-       int i;
-
-       save->vga_render_control = RREG32(mmVGA_RENDER_CONTROL);
-       save->vga_hdp_control = RREG32(mmVGA_HDP_CONTROL);
-
-       /* disable VGA render */
-       tmp = RREG32(mmVGA_RENDER_CONTROL);
-       tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
-       WREG32(mmVGA_RENDER_CONTROL, tmp);
-
-       /* blank the display controllers */
-       for (i = 0; i < adev->mode_info.num_crtc; i++) {
-               crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]),
-                                            CRTC_CONTROL, CRTC_MASTER_EN);
-               if (crtc_enabled) {
-#if 1
-                       save->crtc_enabled[i] = true;
-                       tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]);
-                       if (REG_GET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN) == 0) {
-                               /*it is correct only for RGB ; black is 0*/
-                               WREG32(mmCRTC_BLANK_DATA_COLOR + crtc_offsets[i], 0);
-                               tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 1);
-                               WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp);
-                       }
-#else
-                       /* XXX this is a hack to avoid strange behavior with EFI on certain systems */
-                       WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
-                       tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]);
-                       tmp = REG_SET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN, 0);
-                       WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp);
-                       WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
-                       save->crtc_enabled[i] = false;
-                       /* ***** */
-#endif
-               } else {
-                       save->crtc_enabled[i] = false;
-               }
-       }
-}
-
-static void dce_v11_0_resume_mc_access(struct amdgpu_device *adev,
-                                      struct amdgpu_mode_mc_save *save)
-{
-       u32 tmp;
-       int i;
-
-       /* update crtc base addresses */
-       for (i = 0; i < adev->mode_info.num_crtc; i++) {
-               WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i],
-                      upper_32_bits(adev->mc.vram_start));
-               WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + crtc_offsets[i],
-                      (u32)adev->mc.vram_start);
-
-               if (save->crtc_enabled[i]) {
-                       tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]);
-                       tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 0);
-                       WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp);
-               }
-       }
-
-       WREG32(mmVGA_MEMORY_BASE_ADDRESS_HIGH, upper_32_bits(adev->mc.vram_start));
-       WREG32(mmVGA_MEMORY_BASE_ADDRESS, lower_32_bits(adev->mc.vram_start));
-
-       /* Unlock vga access */
-       WREG32(mmVGA_HDP_CONTROL, save->vga_hdp_control);
-       mdelay(1);
-       WREG32(mmVGA_RENDER_CONTROL, save->vga_render_control);
-}
-
 static void dce_v11_0_set_vga_render_state(struct amdgpu_device *adev,
                                           bool render)
 {
@@ -3086,6 +3013,8 @@ static int dce_v11_0_hw_init(void *handle)
 
        dce_v11_0_init_golden_registers(adev);
 
+       /* disable vga render */
+       dce_v11_0_set_vga_render_state(adev, false);
        /* init dig PHYs, disp eng pll */
        amdgpu_atombios_crtc_powergate_init(adev);
        amdgpu_atombios_encoder_init_dig(adev);
@@ -3806,7 +3735,6 @@ static void dce_v11_0_encoder_add(struct amdgpu_device *adev,
 }
 
 static const struct amdgpu_display_funcs dce_v11_0_display_funcs = {
-       .set_vga_render_state = &dce_v11_0_set_vga_render_state,
        .bandwidth_update = &dce_v11_0_bandwidth_update,
        .vblank_get_counter = &dce_v11_0_vblank_get_counter,
        .vblank_wait = &dce_v11_0_vblank_wait,
@@ -3819,8 +3747,6 @@ static const struct amdgpu_display_funcs dce_v11_0_display_funcs = {
        .page_flip_get_scanoutpos = &dce_v11_0_crtc_get_scanoutpos,
        .add_encoder = &dce_v11_0_encoder_add,
        .add_connector = &amdgpu_connector_add,
-       .stop_mc_access = &dce_v11_0_stop_mc_access,
-       .resume_mc_access = &dce_v11_0_resume_mc_access,
 };
 
 static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev)
index 0c3891fa62f1786ca21b8e37e766b56be0b319f4..126c5e4e7733dcf73f8ea6107c3f990ca8e7b50a 100644 (file)
@@ -392,117 +392,6 @@ static u32 dce_v6_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
        return mmDC_GPIO_HPD_A;
 }
 
-static u32 evergreen_get_vblank_counter(struct amdgpu_device* adev, int crtc)
-{
-       if (crtc >= adev->mode_info.num_crtc)
-               return 0;
-       else
-               return RREG32(mmCRTC_STATUS_FRAME_COUNT + crtc_offsets[crtc]);
-}
-
-static void dce_v6_0_stop_mc_access(struct amdgpu_device *adev,
-                                   struct amdgpu_mode_mc_save *save)
-{
-       u32 crtc_enabled, tmp, frame_count;
-       int i, j;
-
-       save->vga_render_control = RREG32(mmVGA_RENDER_CONTROL);
-       save->vga_hdp_control = RREG32(mmVGA_HDP_CONTROL);
-
-       /* disable VGA render */
-       WREG32(mmVGA_RENDER_CONTROL, 0);
-
-       /* blank the display controllers */
-       for (i = 0; i < adev->mode_info.num_crtc; i++) {
-               crtc_enabled = RREG32(mmCRTC_CONTROL + crtc_offsets[i]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK;
-               if (crtc_enabled) {
-                       save->crtc_enabled[i] = true;
-                       tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]);
-
-                       if (!(tmp & CRTC_BLANK_CONTROL__CRTC_BLANK_DATA_EN_MASK)) {
-                               dce_v6_0_vblank_wait(adev, i);
-                               WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
-                               tmp |= CRTC_BLANK_CONTROL__CRTC_BLANK_DATA_EN_MASK;
-                               WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp);
-                               WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
-                       }
-                       /* wait for the next frame */
-                       frame_count = evergreen_get_vblank_counter(adev, i);
-                       for (j = 0; j < adev->usec_timeout; j++) {
-                               if (evergreen_get_vblank_counter(adev, i) != frame_count)
-                                       break;
-                               udelay(1);
-                       }
-
-                       /* XXX this is a hack to avoid strange behavior with EFI on certain systems */
-                       WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
-                       tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]);
-                       tmp &= ~CRTC_CONTROL__CRTC_MASTER_EN_MASK;
-                       WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp);
-                       WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
-                       save->crtc_enabled[i] = false;
-                       /* ***** */
-               } else {
-                       save->crtc_enabled[i] = false;
-               }
-       }
-}
-
-static void dce_v6_0_resume_mc_access(struct amdgpu_device *adev,
-                                     struct amdgpu_mode_mc_save *save)
-{
-       u32 tmp;
-       int i, j;
-
-       /* update crtc base addresses */
-       for (i = 0; i < adev->mode_info.num_crtc; i++) {
-               WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i],
-                      upper_32_bits(adev->mc.vram_start));
-               WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i],
-                      upper_32_bits(adev->mc.vram_start));
-               WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + crtc_offsets[i],
-                      (u32)adev->mc.vram_start);
-               WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + crtc_offsets[i],
-                      (u32)adev->mc.vram_start);
-       }
-
-       WREG32(mmVGA_MEMORY_BASE_ADDRESS_HIGH, upper_32_bits(adev->mc.vram_start));
-       WREG32(mmVGA_MEMORY_BASE_ADDRESS, (u32)adev->mc.vram_start);
-
-       /* unlock regs and wait for update */
-       for (i = 0; i < adev->mode_info.num_crtc; i++) {
-               if (save->crtc_enabled[i]) {
-                       tmp = RREG32(mmMASTER_UPDATE_MODE + crtc_offsets[i]);
-                       if ((tmp & 0x7) != 0) {
-                               tmp &= ~0x7;
-                               WREG32(mmMASTER_UPDATE_MODE + crtc_offsets[i], tmp);
-                       }
-                       tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]);
-                       if (tmp & GRPH_UPDATE__GRPH_UPDATE_LOCK_MASK) {
-                               tmp &= ~GRPH_UPDATE__GRPH_UPDATE_LOCK_MASK;
-                               WREG32(mmGRPH_UPDATE + crtc_offsets[i], tmp);
-                       }
-                       tmp = RREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i]);
-                       if (tmp & 1) {
-                               tmp &= ~1;
-                               WREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i], tmp);
-                       }
-                       for (j = 0; j < adev->usec_timeout; j++) {
-                               tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]);
-                               if ((tmp & GRPH_UPDATE__GRPH_SURFACE_UPDATE_PENDING_MASK) == 0)
-                                       break;
-                               udelay(1);
-                       }
-               }
-       }
-
-       /* Unlock vga access */
-       WREG32(mmVGA_HDP_CONTROL, save->vga_hdp_control);
-       mdelay(1);
-       WREG32(mmVGA_RENDER_CONTROL, save->vga_render_control);
-
-}
-
 static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev,
                                          bool render)
 {
@@ -2873,6 +2762,8 @@ static int dce_v6_0_hw_init(void *handle)
        int i;
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       /* disable vga render */
+       dce_v6_0_set_vga_render_state(adev, false);
        /* init dig PHYs, disp eng pll */
        amdgpu_atombios_encoder_init_dig(adev);
        amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk);
@@ -3525,7 +3416,6 @@ static void dce_v6_0_encoder_add(struct amdgpu_device *adev,
 }
 
 static const struct amdgpu_display_funcs dce_v6_0_display_funcs = {
-       .set_vga_render_state = &dce_v6_0_set_vga_render_state,
        .bandwidth_update = &dce_v6_0_bandwidth_update,
        .vblank_get_counter = &dce_v6_0_vblank_get_counter,
        .vblank_wait = &dce_v6_0_vblank_wait,
@@ -3538,8 +3428,6 @@ static const struct amdgpu_display_funcs dce_v6_0_display_funcs = {
        .page_flip_get_scanoutpos = &dce_v6_0_crtc_get_scanoutpos,
        .add_encoder = &dce_v6_0_encoder_add,
        .add_connector = &amdgpu_connector_add,
-       .stop_mc_access = &dce_v6_0_stop_mc_access,
-       .resume_mc_access = &dce_v6_0_resume_mc_access,
 };
 
 static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev)
index c164bef8284688fb52b5f0b8f79d1399367c3969..c0740adee46fe1ef3040621b5cbcce4808dcde85 100644 (file)
@@ -419,81 +419,6 @@ static bool dce_v8_0_is_display_hung(struct amdgpu_device *adev)
        return true;
 }
 
-static void dce_v8_0_stop_mc_access(struct amdgpu_device *adev,
-                                   struct amdgpu_mode_mc_save *save)
-{
-       u32 crtc_enabled, tmp;
-       int i;
-
-       save->vga_render_control = RREG32(mmVGA_RENDER_CONTROL);
-       save->vga_hdp_control = RREG32(mmVGA_HDP_CONTROL);
-
-       /* disable VGA render */
-       tmp = RREG32(mmVGA_RENDER_CONTROL);
-       tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
-       WREG32(mmVGA_RENDER_CONTROL, tmp);
-
-       /* blank the display controllers */
-       for (i = 0; i < adev->mode_info.num_crtc; i++) {
-               crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]),
-                                            CRTC_CONTROL, CRTC_MASTER_EN);
-               if (crtc_enabled) {
-#if 1
-                       save->crtc_enabled[i] = true;
-                       tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]);
-                       if (REG_GET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN) == 0) {
-                               /*it is correct only for RGB ; black is 0*/
-                               WREG32(mmCRTC_BLANK_DATA_COLOR + crtc_offsets[i], 0);
-                               tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 1);
-                               WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp);
-                       }
-                       mdelay(20);
-#else
-                       /* XXX this is a hack to avoid strange behavior with EFI on certain systems */
-                       WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
-                       tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]);
-                       tmp = REG_SET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN, 0);
-                       WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp);
-                       WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
-                       save->crtc_enabled[i] = false;
-                       /* ***** */
-#endif
-               } else {
-                       save->crtc_enabled[i] = false;
-               }
-       }
-}
-
-static void dce_v8_0_resume_mc_access(struct amdgpu_device *adev,
-                                     struct amdgpu_mode_mc_save *save)
-{
-       u32 tmp;
-       int i;
-
-       /* update crtc base addresses */
-       for (i = 0; i < adev->mode_info.num_crtc; i++) {
-               WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i],
-                      upper_32_bits(adev->mc.vram_start));
-               WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + crtc_offsets[i],
-                      (u32)adev->mc.vram_start);
-
-               if (save->crtc_enabled[i]) {
-                       tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]);
-                       tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 0);
-                       WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp);
-               }
-               mdelay(20);
-       }
-
-       WREG32(mmVGA_MEMORY_BASE_ADDRESS_HIGH, upper_32_bits(adev->mc.vram_start));
-       WREG32(mmVGA_MEMORY_BASE_ADDRESS, lower_32_bits(adev->mc.vram_start));
-
-       /* Unlock vga access */
-       WREG32(mmVGA_HDP_CONTROL, save->vga_hdp_control);
-       mdelay(1);
-       WREG32(mmVGA_RENDER_CONTROL, save->vga_render_control);
-}
-
 static void dce_v8_0_set_vga_render_state(struct amdgpu_device *adev,
                                          bool render)
 {
@@ -2870,6 +2795,8 @@ static int dce_v8_0_hw_init(void *handle)
        int i;
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       /* disable vga render */
+       dce_v8_0_set_vga_render_state(adev, false);
        /* init dig PHYs, disp eng pll */
        amdgpu_atombios_encoder_init_dig(adev);
        amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk);
@@ -3574,7 +3501,6 @@ static void dce_v8_0_encoder_add(struct amdgpu_device *adev,
 }
 
 static const struct amdgpu_display_funcs dce_v8_0_display_funcs = {
-       .set_vga_render_state = &dce_v8_0_set_vga_render_state,
        .bandwidth_update = &dce_v8_0_bandwidth_update,
        .vblank_get_counter = &dce_v8_0_vblank_get_counter,
        .vblank_wait = &dce_v8_0_vblank_wait,
@@ -3587,8 +3513,6 @@ static const struct amdgpu_display_funcs dce_v8_0_display_funcs = {
        .page_flip_get_scanoutpos = &dce_v8_0_crtc_get_scanoutpos,
        .add_encoder = &dce_v8_0_encoder_add,
        .add_connector = &amdgpu_connector_add,
-       .stop_mc_access = &dce_v8_0_stop_mc_access,
-       .resume_mc_access = &dce_v8_0_resume_mc_access,
 };
 
 static void dce_v8_0_set_display_funcs(struct amdgpu_device *adev)
index 90bb08309a533cd2bad92de91ac9b3cc3bc3db7f..0d2f060206dce0d87c8c2659840d2392c1d5b99b 100644 (file)
@@ -95,62 +95,6 @@ static u32 dce_virtual_hpd_get_gpio_reg(struct amdgpu_device *adev)
        return 0;
 }
 
-static void dce_virtual_stop_mc_access(struct amdgpu_device *adev,
-                             struct amdgpu_mode_mc_save *save)
-{
-       switch (adev->asic_type) {
-#ifdef CONFIG_DRM_AMDGPU_SI
-       case CHIP_TAHITI:
-       case CHIP_PITCAIRN:
-       case CHIP_VERDE:
-       case CHIP_OLAND:
-               dce_v6_0_disable_dce(adev);
-               break;
-#endif
-#ifdef CONFIG_DRM_AMDGPU_CIK
-       case CHIP_BONAIRE:
-       case CHIP_HAWAII:
-       case CHIP_KAVERI:
-       case CHIP_KABINI:
-       case CHIP_MULLINS:
-               dce_v8_0_disable_dce(adev);
-               break;
-#endif
-       case CHIP_FIJI:
-       case CHIP_TONGA:
-               dce_v10_0_disable_dce(adev);
-               break;
-       case CHIP_CARRIZO:
-       case CHIP_STONEY:
-       case CHIP_POLARIS10:
-       case CHIP_POLARIS11:
-       case CHIP_POLARIS12:
-               dce_v11_0_disable_dce(adev);
-               break;
-       case CHIP_TOPAZ:
-#ifdef CONFIG_DRM_AMDGPU_SI
-       case CHIP_HAINAN:
-#endif
-               /* no DCE */
-               return;
-       default:
-               DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type);
-       }
-
-       return;
-}
-static void dce_virtual_resume_mc_access(struct amdgpu_device *adev,
-                               struct amdgpu_mode_mc_save *save)
-{
-       return;
-}
-
-static void dce_virtual_set_vga_render_state(struct amdgpu_device *adev,
-                                   bool render)
-{
-       return;
-}
-
 /**
  * dce_virtual_bandwidth_update - program display watermarks
  *
@@ -522,6 +466,45 @@ static int dce_virtual_sw_fini(void *handle)
 
 static int dce_virtual_hw_init(void *handle)
 {
+       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+       switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+       case CHIP_TAHITI:
+       case CHIP_PITCAIRN:
+       case CHIP_VERDE:
+       case CHIP_OLAND:
+               dce_v6_0_disable_dce(adev);
+               break;
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+       case CHIP_BONAIRE:
+       case CHIP_HAWAII:
+       case CHIP_KAVERI:
+       case CHIP_KABINI:
+       case CHIP_MULLINS:
+               dce_v8_0_disable_dce(adev);
+               break;
+#endif
+       case CHIP_FIJI:
+       case CHIP_TONGA:
+               dce_v10_0_disable_dce(adev);
+               break;
+       case CHIP_CARRIZO:
+       case CHIP_STONEY:
+       case CHIP_POLARIS11:
+       case CHIP_POLARIS10:
+               dce_v11_0_disable_dce(adev);
+               break;
+       case CHIP_TOPAZ:
+#ifdef CONFIG_DRM_AMDGPU_SI
+       case CHIP_HAINAN:
+#endif
+               /* no DCE */
+               break;
+       default:
+               DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type);
+       }
        return 0;
 }
 
@@ -677,7 +660,6 @@ static int dce_virtual_connector_encoder_init(struct amdgpu_device *adev,
 }
 
 static const struct amdgpu_display_funcs dce_virtual_display_funcs = {
-       .set_vga_render_state = &dce_virtual_set_vga_render_state,
        .bandwidth_update = &dce_virtual_bandwidth_update,
        .vblank_get_counter = &dce_virtual_vblank_get_counter,
        .vblank_wait = &dce_virtual_vblank_wait,
@@ -690,8 +672,6 @@ static const struct amdgpu_display_funcs dce_virtual_display_funcs = {
        .page_flip_get_scanoutpos = &dce_virtual_crtc_get_scanoutpos,
        .add_encoder = NULL,
        .add_connector = NULL,
-       .stop_mc_access = &dce_virtual_stop_mc_access,
-       .resume_mc_access = &dce_virtual_resume_mc_access,
 };
 
 static void dce_virtual_set_display_funcs(struct amdgpu_device *adev)
@@ -809,7 +789,7 @@ static const struct amdgpu_irq_src_funcs dce_virtual_crtc_irq_funcs = {
 
 static void dce_virtual_set_irq_funcs(struct amdgpu_device *adev)
 {
-       adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_LAST;
+       adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VBLANK6 + 1;
        adev->crtc_irq.funcs = &dce_virtual_crtc_irq_funcs;
 }
 
index 5173ca1fd159d19971ace8cf87bafb8749a75bbf..4ac85f47f28728b501b3dffa7cc88ad007d93cb2 100644 (file)
@@ -1573,7 +1573,7 @@ static void gfx_v6_0_gpu_init(struct amdgpu_device *adev)
 
 static void gfx_v6_0_scratch_init(struct amdgpu_device *adev)
 {
-       adev->gfx.scratch.num_reg = 7;
+       adev->gfx.scratch.num_reg = 8;
        adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
        adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
 }
index 37b45e4403d175229855c167218ceeeaa627180b..17b7c6934b0a389e1955c292cec66e5939d8ca08 100644 (file)
@@ -2021,7 +2021,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
  */
 static void gfx_v7_0_scratch_init(struct amdgpu_device *adev)
 {
-       adev->gfx.scratch.num_reg = 7;
+       adev->gfx.scratch.num_reg = 8;
        adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
        adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
 }
index aa5a50f5eac817cd2a51a157a3af6a44fa51f587..05436b8730b4195c14ac242546a39270072d0b9d 100644 (file)
@@ -193,8 +193,8 @@ static const u32 tonga_golden_common_all[] =
        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
-       mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
-       mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
+       mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
+       mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 };
 
 static const u32 tonga_mgcg_cgcg_init[] =
@@ -303,8 +303,8 @@ static const u32 polaris11_golden_common_all[] =
        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
-       mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
-       mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
+       mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
+       mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 };
 
 static const u32 golden_settings_polaris10_a11[] =
@@ -336,8 +336,8 @@ static const u32 polaris10_golden_common_all[] =
        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
-       mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
-       mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
+       mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
+       mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 };
 
 static const u32 fiji_golden_common_all[] =
@@ -348,8 +348,8 @@ static const u32 fiji_golden_common_all[] =
        mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
-       mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
-       mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
+       mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
+       mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
        mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
        mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
 };
@@ -436,8 +436,8 @@ static const u32 iceland_golden_common_all[] =
        mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
-       mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
-       mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
+       mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
+       mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 };
 
 static const u32 iceland_mgcg_cgcg_init[] =
@@ -532,8 +532,8 @@ static const u32 cz_golden_common_all[] =
        mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
-       mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
-       mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
+       mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
+       mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 };
 
 static const u32 cz_mgcg_cgcg_init[] =
@@ -637,8 +637,8 @@ static const u32 stoney_golden_common_all[] =
        mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
        mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
        mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
-       mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
-       mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
+       mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
+       mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 };
 
 static const u32 stoney_mgcg_cgcg_init[] =
@@ -750,7 +750,7 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 
 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
 {
-       adev->gfx.scratch.num_reg = 7;
+       adev->gfx.scratch.num_reg = 8;
        adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
        adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
 }
@@ -4564,7 +4564,7 @@ static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
                /* This situation may be hit in the future if a new HW
                 * generation exposes more than 64 queues. If so, the
                 * definition of queue_mask needs updating */
-               if (WARN_ON(i > (sizeof(queue_mask)*8))) {
+               if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
                        DRM_ERROR("Invalid KCQ enabled: %d\n", i);
                        break;
                }
index 3a0b69b09ed62ed9dd18a8b4dbfad595b7d30030..435db6f5efcf0f47dfdced77f7b9c583002e3b8e 100644 (file)
@@ -211,7 +211,7 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
 
 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
 {
-       adev->gfx.scratch.num_reg = 7;
+       adev->gfx.scratch.num_reg = 8;
        adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
        adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
 }
@@ -1475,21 +1475,23 @@ static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
 
 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
 {
-       u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
+       u32 data;
 
-       if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
-               data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
-               data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
-       } else if (se_num == 0xffffffff) {
-               data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
+       if (instance == 0xffffffff)
+               data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
+       else
+               data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
+
+       if (se_num == 0xffffffff)
                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
-       } else if (sh_num == 0xffffffff) {
-               data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
+       else
                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
-       } else {
+
+       if (sh_num == 0xffffffff)
+               data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
+       else
                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
-               data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
-       }
+
        WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
 }
 
@@ -2425,7 +2427,7 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
                /* This situation may be hit in the future if a new HW
                 * generation exposes more than 64 queues. If so, the
                 * definition of queue_mask needs updating */
-               if (WARN_ON(i > (sizeof(queue_mask)*8))) {
+               if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
                        DRM_ERROR("Invalid KCQ enabled: %d\n", i);
                        break;
                }
index a42f483767e75510d02d834fdc724c3b83baa6ec..408723ef157c2aa8972ee9091f7bfcfcdb24fcc7 100644 (file)
@@ -58,14 +58,14 @@ static void gfxhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
        gfxhub_v1_0_init_gart_pt_regs(adev);
 
        WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
-                    (u32)(adev->mc.gtt_start >> 12));
+                    (u32)(adev->mc.gart_start >> 12));
        WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
-                    (u32)(adev->mc.gtt_start >> 44));
+                    (u32)(adev->mc.gart_start >> 44));
 
        WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
-                    (u32)(adev->mc.gtt_end >> 12));
+                    (u32)(adev->mc.gart_end >> 12));
        WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
-                    (u32)(adev->mc.gtt_end >> 44));
+                    (u32)(adev->mc.gart_end >> 44));
 }
 
 static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
@@ -129,7 +129,7 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
        /* Setup L2 cache */
        tmp = RREG32_SOC15(GC, 0, mmVM_L2_CNTL);
        tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
-       tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+       tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 1);
        /* XXX for emulation, Refer to closed source code.*/
        tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
                            0);
@@ -144,6 +144,8 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
        WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp);
 
        tmp = mmVM_L2_CNTL3_DEFAULT;
+       tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
+       tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
        WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp);
 
        tmp = mmVM_L2_CNTL4_DEFAULT;
@@ -206,6 +208,9 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
                tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
                                PAGE_TABLE_BLOCK_SIZE,
                                adev->vm_manager.block_size - 9);
+               /* Send no-retry XNACK on fault to suppress VM fault storm. */
+               tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+                                   RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
                WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i, tmp);
                WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0);
                WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0);
index d0214d942bfc48f2044d6ef26f8671ee5d3b6f39..93c45f26b7c8645d4a8ba9a74f126c6f5f21b8aa 100644 (file)
@@ -66,14 +66,10 @@ static const u32 crtc_offsets[6] =
        SI_CRTC5_REGISTER_OFFSET
 };
 
-static void gmc_v6_0_mc_stop(struct amdgpu_device *adev,
-                            struct amdgpu_mode_mc_save *save)
+static void gmc_v6_0_mc_stop(struct amdgpu_device *adev)
 {
        u32 blackout;
 
-       if (adev->mode_info.num_crtc)
-               amdgpu_display_stop_mc_access(adev, save);
-
        gmc_v6_0_wait_for_idle((void *)adev);
 
        blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
@@ -90,8 +86,7 @@ static void gmc_v6_0_mc_stop(struct amdgpu_device *adev,
 
 }
 
-static void gmc_v6_0_mc_resume(struct amdgpu_device *adev,
-                              struct amdgpu_mode_mc_save *save)
+static void gmc_v6_0_mc_resume(struct amdgpu_device *adev)
 {
        u32 tmp;
 
@@ -103,10 +98,6 @@ static void gmc_v6_0_mc_resume(struct amdgpu_device *adev,
        tmp = REG_SET_FIELD(0, BIF_FB_EN, FB_READ_EN, 1);
        tmp = REG_SET_FIELD(tmp, BIF_FB_EN, FB_WRITE_EN, 1);
        WREG32(mmBIF_FB_EN, tmp);
-
-       if (adev->mode_info.num_crtc)
-               amdgpu_display_resume_mc_access(adev, save);
-
 }
 
 static int gmc_v6_0_init_microcode(struct amdgpu_device *adev)
@@ -228,20 +219,20 @@ static int gmc_v6_0_mc_load_microcode(struct amdgpu_device *adev)
 static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev,
                                       struct amdgpu_mc *mc)
 {
+       u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
+       base <<= 24;
+
        if (mc->mc_vram_size > 0xFFC0000000ULL) {
                dev_warn(adev->dev, "limiting VRAM\n");
                mc->real_vram_size = 0xFFC0000000ULL;
                mc->mc_vram_size = 0xFFC0000000ULL;
        }
-       amdgpu_vram_location(adev, &adev->mc, 0);
-       adev->mc.gtt_base_align = 0;
-       amdgpu_gtt_location(adev, mc);
+       amdgpu_vram_location(adev, &adev->mc, base);
+       amdgpu_gart_location(adev, mc);
 }
 
 static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
 {
-       struct amdgpu_mode_mc_save save;
-       u32 tmp;
        int i, j;
 
        /* Initialize HDP */
@@ -254,16 +245,23 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
        }
        WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0);
 
-       if (adev->mode_info.num_crtc)
-               amdgpu_display_set_vga_render_state(adev, false);
-
-       gmc_v6_0_mc_stop(adev, &save);
-
        if (gmc_v6_0_wait_for_idle((void *)adev)) {
                dev_warn(adev->dev, "Wait for MC idle timedout !\n");
        }
 
-       WREG32(mmVGA_HDP_CONTROL, VGA_HDP_CONTROL__VGA_MEMORY_DISABLE_MASK);
+       if (adev->mode_info.num_crtc) {
+               u32 tmp;
+
+               /* Lockout access through VGA aperture*/
+               tmp = RREG32(mmVGA_HDP_CONTROL);
+               tmp |= VGA_HDP_CONTROL__VGA_MEMORY_DISABLE_MASK;
+               WREG32(mmVGA_HDP_CONTROL, tmp);
+
+               /* disable VGA render */
+               tmp = RREG32(mmVGA_RENDER_CONTROL);
+               tmp &= ~VGA_VSTATUS_CNTL;
+               WREG32(mmVGA_RENDER_CONTROL, tmp);
+       }
        /* Update configuration */
        WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
               adev->mc.vram_start >> 12);
@@ -271,13 +269,6 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
               adev->mc.vram_end >> 12);
        WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
               adev->vram_scratch.gpu_addr >> 12);
-       tmp = ((adev->mc.vram_end >> 24) & 0xFFFF) << 16;
-       tmp |= ((adev->mc.vram_start >> 24) & 0xFFFF);
-       WREG32(mmMC_VM_FB_LOCATION, tmp);
-       /* XXX double check these! */
-       WREG32(mmHDP_NONSURFACE_BASE, (adev->mc.vram_start >> 8));
-       WREG32(mmHDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
-       WREG32(mmHDP_NONSURFACE_SIZE, 0x3FFFFFFF);
        WREG32(mmMC_VM_AGP_BASE, 0);
        WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF);
        WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF);
@@ -285,7 +276,6 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
        if (gmc_v6_0_wait_for_idle((void *)adev)) {
                dev_warn(adev->dev, "Wait for MC idle timedout !\n");
        }
-       gmc_v6_0_mc_resume(adev, &save);
 }
 
 static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
@@ -342,15 +332,7 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
        adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
        adev->mc.visible_vram_size = adev->mc.aper_size;
 
-       /* unless the user had overridden it, set the gart
-        * size equal to the 1024 or vram, whichever is larger.
-        */
-       if (amdgpu_gart_size == -1)
-               adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
-                                       adev->mc.mc_vram_size);
-       else
-               adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20;
-
+       amdgpu_gart_set_defaults(adev);
        gmc_v6_0_vram_gtt_location(adev, &adev->mc);
 
        return 0;
@@ -511,8 +493,8 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
               (4UL << VM_L2_CNTL3__BANK_SELECT__SHIFT) |
               (4UL << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT));
        /* setup context0 */
-       WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gtt_start >> 12);
-       WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gtt_end >> 12);
+       WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12);
+       WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12);
        WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
        WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
                        (u32)(adev->dummy_page.addr >> 12));
@@ -559,7 +541,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
 
        gmc_v6_0_gart_flush_gpu_tlb(adev, 0);
        dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
-                (unsigned)(adev->mc.gtt_size >> 20),
+                (unsigned)(adev->mc.gart_size >> 20),
                 (unsigned long long)adev->gart.table_addr);
        adev->gart.ready = true;
        return 0;
@@ -987,7 +969,6 @@ static int gmc_v6_0_wait_for_idle(void *handle)
 static int gmc_v6_0_soft_reset(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-       struct amdgpu_mode_mc_save save;
        u32 srbm_soft_reset = 0;
        u32 tmp = RREG32(mmSRBM_STATUS);
 
@@ -1003,7 +984,7 @@ static int gmc_v6_0_soft_reset(void *handle)
        }
 
        if (srbm_soft_reset) {
-               gmc_v6_0_mc_stop(adev, &save);
+               gmc_v6_0_mc_stop(adev);
                if (gmc_v6_0_wait_for_idle(adev)) {
                        dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
                }
@@ -1023,7 +1004,7 @@ static int gmc_v6_0_soft_reset(void *handle)
 
                udelay(50);
 
-               gmc_v6_0_mc_resume(adev, &save);
+               gmc_v6_0_mc_resume(adev);
                udelay(50);
        }
 
index 7e9ea53edf8bfe65ef5667a4a6a8002c9e0ef0cf..4a9e84062874254275af685ebd56a2ff702cad1c 100644 (file)
@@ -37,6 +37,9 @@
 #include "oss/oss_2_0_d.h"
 #include "oss/oss_2_0_sh_mask.h"
 
+#include "dce/dce_8_0_d.h"
+#include "dce/dce_8_0_sh_mask.h"
+
 #include "amdgpu_atombios.h"
 
 static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev);
@@ -76,14 +79,10 @@ static void gmc_v7_0_init_golden_registers(struct amdgpu_device *adev)
        }
 }
 
-static void gmc_v7_0_mc_stop(struct amdgpu_device *adev,
-                            struct amdgpu_mode_mc_save *save)
+static void gmc_v7_0_mc_stop(struct amdgpu_device *adev)
 {
        u32 blackout;
 
-       if (adev->mode_info.num_crtc)
-               amdgpu_display_stop_mc_access(adev, save);
-
        gmc_v7_0_wait_for_idle((void *)adev);
 
        blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
@@ -99,8 +98,7 @@ static void gmc_v7_0_mc_stop(struct amdgpu_device *adev,
        udelay(100);
 }
 
-static void gmc_v7_0_mc_resume(struct amdgpu_device *adev,
-                              struct amdgpu_mode_mc_save *save)
+static void gmc_v7_0_mc_resume(struct amdgpu_device *adev)
 {
        u32 tmp;
 
@@ -112,9 +110,6 @@ static void gmc_v7_0_mc_resume(struct amdgpu_device *adev,
        tmp = REG_SET_FIELD(0, BIF_FB_EN, FB_READ_EN, 1);
        tmp = REG_SET_FIELD(tmp, BIF_FB_EN, FB_WRITE_EN, 1);
        WREG32(mmBIF_FB_EN, tmp);
-
-       if (adev->mode_info.num_crtc)
-               amdgpu_display_resume_mc_access(adev, save);
 }
 
 /**
@@ -242,15 +237,17 @@ static int gmc_v7_0_mc_load_microcode(struct amdgpu_device *adev)
 static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev,
                                       struct amdgpu_mc *mc)
 {
+       u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
+       base <<= 24;
+
        if (mc->mc_vram_size > 0xFFC0000000ULL) {
                /* leave room for at least 1024M GTT */
                dev_warn(adev->dev, "limiting VRAM\n");
                mc->real_vram_size = 0xFFC0000000ULL;
                mc->mc_vram_size = 0xFFC0000000ULL;
        }
-       amdgpu_vram_location(adev, &adev->mc, 0);
-       adev->mc.gtt_base_align = 0;
-       amdgpu_gtt_location(adev, mc);
+       amdgpu_vram_location(adev, &adev->mc, base);
+       amdgpu_gart_location(adev, mc);
 }
 
 /**
@@ -263,7 +260,6 @@ static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev,
  */
 static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
 {
-       struct amdgpu_mode_mc_save save;
        u32 tmp;
        int i, j;
 
@@ -277,13 +273,20 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
        }
        WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0);
 
-       if (adev->mode_info.num_crtc)
-               amdgpu_display_set_vga_render_state(adev, false);
-
-       gmc_v7_0_mc_stop(adev, &save);
        if (gmc_v7_0_wait_for_idle((void *)adev)) {
                dev_warn(adev->dev, "Wait for MC idle timedout !\n");
        }
+       if (adev->mode_info.num_crtc) {
+               /* Lockout access through VGA aperture*/
+               tmp = RREG32(mmVGA_HDP_CONTROL);
+               tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
+               WREG32(mmVGA_HDP_CONTROL, tmp);
+
+               /* disable VGA render */
+               tmp = RREG32(mmVGA_RENDER_CONTROL);
+               tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
+               WREG32(mmVGA_RENDER_CONTROL, tmp);
+       }
        /* Update configuration */
        WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
               adev->mc.vram_start >> 12);
@@ -291,20 +294,12 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
               adev->mc.vram_end >> 12);
        WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
               adev->vram_scratch.gpu_addr >> 12);
-       tmp = ((adev->mc.vram_end >> 24) & 0xFFFF) << 16;
-       tmp |= ((adev->mc.vram_start >> 24) & 0xFFFF);
-       WREG32(mmMC_VM_FB_LOCATION, tmp);
-       /* XXX double check these! */
-       WREG32(mmHDP_NONSURFACE_BASE, (adev->mc.vram_start >> 8));
-       WREG32(mmHDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
-       WREG32(mmHDP_NONSURFACE_SIZE, 0x3FFFFFFF);
        WREG32(mmMC_VM_AGP_BASE, 0);
        WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF);
        WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF);
        if (gmc_v7_0_wait_for_idle((void *)adev)) {
                dev_warn(adev->dev, "Wait for MC idle timedout !\n");
        }
-       gmc_v7_0_mc_resume(adev, &save);
 
        WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
 
@@ -391,15 +386,7 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
        if (adev->mc.visible_vram_size > adev->mc.real_vram_size)
                adev->mc.visible_vram_size = adev->mc.real_vram_size;
 
-       /* unless the user had overridden it, set the gart
-        * size equal to the 1024 or vram, whichever is larger.
-        */
-       if (amdgpu_gart_size == -1)
-               adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
-                                       adev->mc.mc_vram_size);
-       else
-               adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20;
-
+       amdgpu_gart_set_defaults(adev);
        gmc_v7_0_vram_gtt_location(adev, &adev->mc);
 
        return 0;
@@ -611,8 +598,8 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
        tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 4);
        WREG32(mmVM_L2_CNTL3, tmp);
        /* setup context0 */
-       WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gtt_start >> 12);
-       WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gtt_end >> 12);
+       WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12);
+       WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12);
        WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
        WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
                        (u32)(adev->dummy_page.addr >> 12));
@@ -666,7 +653,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
 
        gmc_v7_0_gart_flush_gpu_tlb(adev, 0);
        DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
-                (unsigned)(adev->mc.gtt_size >> 20),
+                (unsigned)(adev->mc.gart_size >> 20),
                 (unsigned long long)adev->gart.table_addr);
        adev->gart.ready = true;
        return 0;
@@ -1138,7 +1125,6 @@ static int gmc_v7_0_wait_for_idle(void *handle)
 static int gmc_v7_0_soft_reset(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-       struct amdgpu_mode_mc_save save;
        u32 srbm_soft_reset = 0;
        u32 tmp = RREG32(mmSRBM_STATUS);
 
@@ -1154,7 +1140,7 @@ static int gmc_v7_0_soft_reset(void *handle)
        }
 
        if (srbm_soft_reset) {
-               gmc_v7_0_mc_stop(adev, &save);
+               gmc_v7_0_mc_stop(adev);
                if (gmc_v7_0_wait_for_idle((void *)adev)) {
                        dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
                }
@@ -1175,7 +1161,7 @@ static int gmc_v7_0_soft_reset(void *handle)
                /* Wait a little for things to settle down */
                udelay(50);
 
-               gmc_v7_0_mc_resume(adev, &save);
+               gmc_v7_0_mc_resume(adev);
                udelay(50);
        }
 
index cc9f88057cd5e745cfdcc36d16e716fcc2c168ac..85c937b5e40bdeaf4b869a6233af069fd9b00fbf 100644 (file)
@@ -35,6 +35,9 @@
 #include "oss/oss_3_0_d.h"
 #include "oss/oss_3_0_sh_mask.h"
 
+#include "dce/dce_10_0_d.h"
+#include "dce/dce_10_0_sh_mask.h"
+
 #include "vid.h"
 #include "vi.h"
 
@@ -161,14 +164,10 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
        }
 }
 
-static void gmc_v8_0_mc_stop(struct amdgpu_device *adev,
-                            struct amdgpu_mode_mc_save *save)
+static void gmc_v8_0_mc_stop(struct amdgpu_device *adev)
 {
        u32 blackout;
 
-       if (adev->mode_info.num_crtc)
-               amdgpu_display_stop_mc_access(adev, save);
-
        gmc_v8_0_wait_for_idle(adev);
 
        blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
@@ -184,8 +183,7 @@ static void gmc_v8_0_mc_stop(struct amdgpu_device *adev,
        udelay(100);
 }
 
-static void gmc_v8_0_mc_resume(struct amdgpu_device *adev,
-                              struct amdgpu_mode_mc_save *save)
+static void gmc_v8_0_mc_resume(struct amdgpu_device *adev)
 {
        u32 tmp;
 
@@ -197,9 +195,6 @@ static void gmc_v8_0_mc_resume(struct amdgpu_device *adev,
        tmp = REG_SET_FIELD(0, BIF_FB_EN, FB_READ_EN, 1);
        tmp = REG_SET_FIELD(tmp, BIF_FB_EN, FB_WRITE_EN, 1);
        WREG32(mmBIF_FB_EN, tmp);
-
-       if (adev->mode_info.num_crtc)
-               amdgpu_display_resume_mc_access(adev, save);
 }
 
 /**
@@ -404,15 +399,20 @@ static int gmc_v8_0_polaris_mc_load_microcode(struct amdgpu_device *adev)
 static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
                                       struct amdgpu_mc *mc)
 {
+       u64 base = 0;
+
+       if (!amdgpu_sriov_vf(adev))
+               base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
+       base <<= 24;
+
        if (mc->mc_vram_size > 0xFFC0000000ULL) {
                /* leave room for at least 1024M GTT */
                dev_warn(adev->dev, "limiting VRAM\n");
                mc->real_vram_size = 0xFFC0000000ULL;
                mc->mc_vram_size = 0xFFC0000000ULL;
        }
-       amdgpu_vram_location(adev, &adev->mc, 0);
-       adev->mc.gtt_base_align = 0;
-       amdgpu_gtt_location(adev, mc);
+       amdgpu_vram_location(adev, &adev->mc, base);
+       amdgpu_gart_location(adev, mc);
 }
 
 /**
@@ -425,7 +425,6 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
  */
 static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
 {
-       struct amdgpu_mode_mc_save save;
        u32 tmp;
        int i, j;
 
@@ -439,13 +438,20 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
        }
        WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0);
 
-       if (adev->mode_info.num_crtc)
-               amdgpu_display_set_vga_render_state(adev, false);
-
-       gmc_v8_0_mc_stop(adev, &save);
        if (gmc_v8_0_wait_for_idle((void *)adev)) {
                dev_warn(adev->dev, "Wait for MC idle timedout !\n");
        }
+       if (adev->mode_info.num_crtc) {
+               /* Lockout access through VGA aperture*/
+               tmp = RREG32(mmVGA_HDP_CONTROL);
+               tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
+               WREG32(mmVGA_HDP_CONTROL, tmp);
+
+               /* disable VGA render */
+               tmp = RREG32(mmVGA_RENDER_CONTROL);
+               tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
+               WREG32(mmVGA_RENDER_CONTROL, tmp);
+       }
        /* Update configuration */
        WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
               adev->mc.vram_start >> 12);
@@ -453,20 +459,23 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
               adev->mc.vram_end >> 12);
        WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
               adev->vram_scratch.gpu_addr >> 12);
-       tmp = ((adev->mc.vram_end >> 24) & 0xFFFF) << 16;
-       tmp |= ((adev->mc.vram_start >> 24) & 0xFFFF);
-       WREG32(mmMC_VM_FB_LOCATION, tmp);
-       /* XXX double check these! */
-       WREG32(mmHDP_NONSURFACE_BASE, (adev->mc.vram_start >> 8));
-       WREG32(mmHDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
-       WREG32(mmHDP_NONSURFACE_SIZE, 0x3FFFFFFF);
+
+       if (amdgpu_sriov_vf(adev)) {
+               tmp = ((adev->mc.vram_end >> 24) & 0xFFFF) << 16;
+               tmp |= ((adev->mc.vram_start >> 24) & 0xFFFF);
+               WREG32(mmMC_VM_FB_LOCATION, tmp);
+               /* XXX double check these! */
+               WREG32(mmHDP_NONSURFACE_BASE, (adev->mc.vram_start >> 8));
+               WREG32(mmHDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
+               WREG32(mmHDP_NONSURFACE_SIZE, 0x3FFFFFFF);
+       }
+
        WREG32(mmMC_VM_AGP_BASE, 0);
        WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF);
        WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF);
        if (gmc_v8_0_wait_for_idle((void *)adev)) {
                dev_warn(adev->dev, "Wait for MC idle timedout !\n");
        }
-       gmc_v8_0_mc_resume(adev, &save);
 
        WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
 
@@ -553,15 +562,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
        if (adev->mc.visible_vram_size > adev->mc.real_vram_size)
                adev->mc.visible_vram_size = adev->mc.real_vram_size;
 
-       /* unless the user had overridden it, set the gart
-        * size equal to the 1024 or vram, whichever is larger.
-        */
-       if (amdgpu_gart_size == -1)
-               adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
-                                       adev->mc.mc_vram_size);
-       else
-               adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20;
-
+       amdgpu_gart_set_defaults(adev);
        gmc_v8_0_vram_gtt_location(adev, &adev->mc);
 
        return 0;
@@ -813,8 +814,8 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
        tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_CONTEXT1_PTE_REQUEST_SNOOP, 0);
        WREG32(mmVM_L2_CNTL4, tmp);
        /* setup context0 */
-       WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gtt_start >> 12);
-       WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gtt_end >> 12);
+       WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12);
+       WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12);
        WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
        WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
                        (u32)(adev->dummy_page.addr >> 12));
@@ -869,7 +870,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
 
        gmc_v8_0_gart_flush_gpu_tlb(adev, 0);
        DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
-                (unsigned)(adev->mc.gtt_size >> 20),
+                (unsigned)(adev->mc.gart_size >> 20),
                 (unsigned long long)adev->gart.table_addr);
        adev->gart.ready = true;
        return 0;
@@ -1260,7 +1261,7 @@ static int gmc_v8_0_pre_soft_reset(void *handle)
        if (!adev->mc.srbm_soft_reset)
                return 0;
 
-       gmc_v8_0_mc_stop(adev, &adev->mc.save);
+       gmc_v8_0_mc_stop(adev);
        if (gmc_v8_0_wait_for_idle(adev)) {
                dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
        }
@@ -1306,7 +1307,7 @@ static int gmc_v8_0_post_soft_reset(void *handle)
        if (!adev->mc.srbm_soft_reset)
                return 0;
 
-       gmc_v8_0_mc_resume(adev, &adev->mc.save);
+       gmc_v8_0_mc_resume(adev);
        return 0;
 }
 
index 175ba5f9691c4a5188442d012430e4212bbaee20..c22899a08106293afac45421b8f596bcc6c60390 100644 (file)
 #include <linux/firmware.h>
 #include "amdgpu.h"
 #include "gmc_v9_0.h"
+#include "amdgpu_atomfirmware.h"
 
 #include "vega10/soc15ip.h"
 #include "vega10/HDP/hdp_4_0_offset.h"
 #include "vega10/HDP/hdp_4_0_sh_mask.h"
 #include "vega10/GC/gc_9_0_sh_mask.h"
+#include "vega10/DC/dce_12_0_offset.h"
+#include "vega10/DC/dce_12_0_sh_mask.h"
 #include "vega10/vega10_enum.h"
 
 #include "soc15_common.h"
@@ -419,8 +422,7 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
        if (!amdgpu_sriov_vf(adev))
                base = mmhub_v1_0_get_fb_location(adev);
        amdgpu_vram_location(adev, &adev->mc, base);
-       adev->mc.gtt_base_align = 0;
-       amdgpu_gtt_location(adev, mc);
+       amdgpu_gart_location(adev, mc);
        /* base offset of vram pages */
        if (adev->flags & AMD_IS_APU)
                adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev);
@@ -442,43 +444,46 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
        u32 tmp;
        int chansize, numchan;
 
-       /* hbm memory channel size */
-       chansize = 128;
-
-       tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0);
-       tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK;
-       tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
-       switch (tmp) {
-       case 0:
-       default:
-               numchan = 1;
-               break;
-       case 1:
-               numchan = 2;
-               break;
-       case 2:
-               numchan = 0;
-               break;
-       case 3:
-               numchan = 4;
-               break;
-       case 4:
-               numchan = 0;
-               break;
-       case 5:
-               numchan = 8;
-               break;
-       case 6:
-               numchan = 0;
-               break;
-       case 7:
-               numchan = 16;
-               break;
-       case 8:
-               numchan = 2;
-               break;
+       adev->mc.vram_width = amdgpu_atomfirmware_get_vram_width(adev);
+       if (!adev->mc.vram_width) {
+               /* hbm memory channel size */
+               chansize = 128;
+
+               tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0);
+               tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+               tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
+               switch (tmp) {
+               case 0:
+               default:
+                       numchan = 1;
+                       break;
+               case 1:
+                       numchan = 2;
+                       break;
+               case 2:
+                       numchan = 0;
+                       break;
+               case 3:
+                       numchan = 4;
+                       break;
+               case 4:
+                       numchan = 0;
+                       break;
+               case 5:
+                       numchan = 8;
+                       break;
+               case 6:
+                       numchan = 0;
+                       break;
+               case 7:
+                       numchan = 16;
+                       break;
+               case 8:
+                       numchan = 2;
+                       break;
+               }
+               adev->mc.vram_width = numchan * chansize;
        }
-       adev->mc.vram_width = numchan * chansize;
 
        /* Could aper size report 0 ? */
        adev->mc.aper_base = pci_resource_start(adev->pdev, 0);
@@ -494,15 +499,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
        if (adev->mc.visible_vram_size > adev->mc.real_vram_size)
                adev->mc.visible_vram_size = adev->mc.real_vram_size;
 
-       /* unless the user had overridden it, set the gart
-        * size equal to the 1024 or vram, whichever is larger.
-        */
-       if (amdgpu_gart_size == -1)
-               adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
-                                       adev->mc.mc_vram_size);
-       else
-               adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20;
-
+       amdgpu_gart_set_defaults(adev);
        gmc_v9_0_vram_gtt_location(adev, &adev->mc);
 
        return 0;
@@ -537,10 +534,20 @@ static int gmc_v9_0_sw_init(void *handle)
 
        spin_lock_init(&adev->mc.invalidate_lock);
 
-       if (adev->flags & AMD_IS_APU) {
+       switch (adev->asic_type) {
+       case CHIP_RAVEN:
                adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
-               amdgpu_vm_adjust_size(adev, 64);
-       } else {
+               if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
+                       adev->vm_manager.vm_size = 1U << 18;
+                       adev->vm_manager.block_size = 9;
+                       adev->vm_manager.num_level = 3;
+               } else {
+                       /* vm_size is 64GB for legacy 2-level page support*/
+                       amdgpu_vm_adjust_size(adev, 64);
+                       adev->vm_manager.num_level = 1;
+               }
+               break;
+       case CHIP_VEGA10:
                /* XXX Don't know how to get VRAM type yet. */
                adev->mc.vram_type = AMDGPU_VRAM_TYPE_HBM;
                /*
@@ -550,11 +557,16 @@ static int gmc_v9_0_sw_init(void *handle)
                 */
                adev->vm_manager.vm_size = 1U << 18;
                adev->vm_manager.block_size = 9;
-               DRM_INFO("vm size is %llu GB, block size is %u-bit\n",
-                               adev->vm_manager.vm_size,
-                               adev->vm_manager.block_size);
+               adev->vm_manager.num_level = 3;
+               break;
+       default:
+               break;
        }
 
+       DRM_INFO("vm size is %llu GB, block size is %u-bit\n",
+                       adev->vm_manager.vm_size,
+                       adev->vm_manager.block_size);
+
        /* This interrupt is VMC page fault.*/
        r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VMC, 0,
                                &adev->mc.vm_fault);
@@ -619,11 +631,6 @@ static int gmc_v9_0_sw_init(void *handle)
        adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
        adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
 
-       /* TODO: fix num_level for APU when updating vm size and block size */
-       if (adev->flags & AMD_IS_APU)
-               adev->vm_manager.num_level = 1;
-       else
-               adev->vm_manager.num_level = 3;
        amdgpu_vm_manager_init(adev);
 
        return 0;
@@ -731,7 +738,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
        gmc_v9_0_gart_flush_gpu_tlb(adev, 0);
 
        DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
-                (unsigned)(adev->mc.gtt_size >> 20),
+                (unsigned)(adev->mc.gart_size >> 20),