Merge tag 'v5.3-rc3' into drm-next-5.4
[sfrench/cifs-2.6.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_ttm.c
index 5c05644b9b96a802d6c2699b4af631e85ea0e7a2..ab92b24ac4ff43503decdac2e7ab9a9ff028c706 100644 (file)
@@ -391,6 +391,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
                        src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm,
                                                             src->mem);
                        src_node_size = (src_mm->size << PAGE_SHIFT);
+                       src_page_offset = 0;
                } else {
                        src_node_start += cur_size;
                        src_page_offset = src_node_start & (PAGE_SIZE - 1);
@@ -400,6 +401,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
                        dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm,
                                                             dst->mem);
                        dst_node_size = (dst_mm->size << PAGE_SHIFT);
+                       dst_page_offset = 0;
                } else {
                        dst_node_start += cur_size;
                        dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
@@ -442,6 +444,22 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
        if (r)
                goto error;
 
+       /* clear the space being freed */
+       if (old_mem->mem_type == TTM_PL_VRAM &&
+           (ttm_to_amdgpu_bo(bo)->flags &
+            AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
+               struct dma_fence *wipe_fence = NULL;
+
+               r = amdgpu_fill_buffer(ttm_to_amdgpu_bo(bo), AMDGPU_POISON,
+                                      NULL, &wipe_fence);
+               if (r) {
+                       goto error;
+               } else if (wipe_fence) {
+                       dma_fence_put(fence);
+                       fence = wipe_fence;
+               }
+       }
+
        /* Always block for VM page tables before committing the new location */
        if (bo->type == ttm_bo_type_kernel)
                r = ttm_bo_move_accel_cleanup(bo, fence, true, new_mem);
@@ -487,6 +505,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
        placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
        r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx);
        if (unlikely(r)) {
+               pr_err("Failed to find GTT space for blit from VRAM\n");
                return r;
        }
 
@@ -545,6 +564,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
        placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
        r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx);
        if (unlikely(r)) {
+               pr_err("Failed to find GTT space for blit to VRAM\n");
                return r;
        }
 
@@ -564,6 +584,30 @@ out_cleanup:
        return r;
 }
 
+/**
+ * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy
+ *
+ * Called by amdgpu_bo_move()
+ */
+static bool amdgpu_mem_visible(struct amdgpu_device *adev,
+                              struct ttm_mem_reg *mem)
+{
+       struct drm_mm_node *nodes = mem->mm_node;
+
+       if (mem->mem_type == TTM_PL_SYSTEM ||
+           mem->mem_type == TTM_PL_TT)
+               return true;
+       if (mem->mem_type != TTM_PL_VRAM)
+               return false;
+
+       /* ttm_mem_reg_ioremap only supports contiguous memory */
+       if (nodes->size != mem->num_pages)
+               return false;
+
+       return ((nodes->start + nodes->size) << PAGE_SHIFT)
+               <= adev->gmc.visible_vram_size;
+}
+
 /**
  * amdgpu_bo_move - Move a buffer object to a new memory location
  *
@@ -608,8 +652,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
                return 0;
        }
 
-       if (!adev->mman.buffer_funcs_enabled)
+       if (!adev->mman.buffer_funcs_enabled) {
+               r = -ENODEV;
                goto memcpy;
+       }
 
        if (old_mem->mem_type == TTM_PL_VRAM &&
            new_mem->mem_type == TTM_PL_SYSTEM) {
@@ -624,10 +670,16 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
 
        if (r) {
 memcpy:
-               r = ttm_bo_move_memcpy(bo, ctx, new_mem);
-               if (r) {
+               /* Check that all memory is CPU accessible */
+               if (!amdgpu_mem_visible(adev, old_mem) ||
+                   !amdgpu_mem_visible(adev, new_mem)) {
+                       pr_err("Move buffer fallback to memcpy unavailable\n");
                        return r;
                }
+
+               r = ttm_bo_move_memcpy(bo, ctx, new_mem);
+               if (r)
+                       return r;
        }
 
        if (bo->type == ttm_bo_type_device &&
@@ -1563,6 +1615,7 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
        .move = &amdgpu_bo_move,
        .verify_access = &amdgpu_verify_access,
        .move_notify = &amdgpu_bo_move_notify,
+       .release_notify = &amdgpu_bo_release_notify,
        .fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,
        .io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
        .io_mem_free = &amdgpu_ttm_io_mem_free,
@@ -2059,9 +2112,9 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
        mm_node = bo->tbo.mem.mm_node;
        num_loops = 0;
        while (num_pages) {
-               uint32_t byte_count = mm_node->size << PAGE_SHIFT;
+               uint64_t byte_count = mm_node->size << PAGE_SHIFT;
 
-               num_loops += DIV_ROUND_UP(byte_count, max_bytes);
+               num_loops += DIV_ROUND_UP_ULL(byte_count, max_bytes);
                num_pages -= mm_node->size;
                ++mm_node;
        }
@@ -2087,12 +2140,13 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
        mm_node = bo->tbo.mem.mm_node;
 
        while (num_pages) {
-               uint32_t byte_count = mm_node->size << PAGE_SHIFT;
+               uint64_t byte_count = mm_node->size << PAGE_SHIFT;
                uint64_t dst_addr;
 
                dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem);
                while (byte_count) {
-                       uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
+                       uint32_t cur_size_in_bytes = min_t(uint64_t, byte_count,
+                                                          max_bytes);
 
                        amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data,
                                                dst_addr, cur_size_in_bytes);