Merge tag 'amd-drm-next-5.18-2022-02-11-1' of https://gitlab.freedesktop.org/agd5f...

[sfrench/cifs-2.6.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_ttm.c
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

index d178fbec70489523711d0c9f940ed7483d747df6..414a22dddc78d03d66729cf08ed2cb4ae5baad5c 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -50,6 +50,7 @@
  #include <drm/ttm/ttm_range_manager.h>
  
  #include <drm/amdgpu_drm.h>
+#include <drm/drm_drv.h>
  
  #include "amdgpu.h"
  #include "amdgpu_object.h"
@@ -170,10 +171,10 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
   * @bo: buffer object to map
   * @mem: memory object to map
   * @mm_cur: range to map
- * @num_pages: number of pages to map
   * @window: which GART window to use
   * @ring: DMA ring to use for the copy
   * @tmz: if we should setup a TMZ enabled mapping
+ * @size: in number of bytes to map, out number of bytes mapped
   * @addr: resulting address inside the MC address space
   *
   * Setup one of the GART windows to access a specific piece of memory or return
@@ -182,15 +183,14 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
  static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
                                  struct ttm_resource *mem,
                                  struct amdgpu_res_cursor *mm_cur,
-                                unsigned num_pages, unsigned window,
-                                struct amdgpu_ring *ring, bool tmz,
-                                uint64_t *addr)
+                                unsigned window, struct amdgpu_ring *ring,
+                                bool tmz, uint64_t *size, uint64_t *addr)
  {
         struct amdgpu_device *adev = ring->adev;
-       struct amdgpu_job *job;
-       unsigned num_dw, num_bytes;
-       struct dma_fence *fence;
+       unsigned offset, num_pages, num_dw, num_bytes;
         uint64_t src_addr, dst_addr;
+       struct dma_fence *fence;
+       struct amdgpu_job *job;
         void *cpu_addr;
         uint64_t flags;
         unsigned int i;
@@ -198,7 +198,9 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
  
         BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
                AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
-       BUG_ON(mem->mem_type == AMDGPU_PL_PREEMPT);
+
+       if (WARN_ON(mem->mem_type == AMDGPU_PL_PREEMPT))
+               return -EINVAL;
  
         /* Map only what can't be accessed directly */
         if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
@@ -207,10 +209,22 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
                 return 0;
         }
  
+
+       /*
+        * If start begins at an offset inside the page, then adjust the size
+        * and addr accordingly
+        */
+       offset = mm_cur->start & ~PAGE_MASK;
+
+       num_pages = PFN_UP(*size + offset);
+       num_pages = min_t(uint32_t, num_pages, AMDGPU_GTT_MAX_TRANSFER_SIZE);
+
+       *size = min(*size, (uint64_t)num_pages * PAGE_SIZE - offset);
+
         *addr = adev->gmc.gart_start;
         *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
                 AMDGPU_GPU_PAGE_SIZE;
-       *addr += mm_cur->start & ~PAGE_MASK;
+       *addr += offset;
  
         num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
         num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
@@ -241,10 +255,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
                 dma_addr_t *dma_addr;
  
                 dma_addr = &bo->ttm->dma_address[mm_cur->start >> PAGE_SHIFT];
-               r = amdgpu_gart_map(adev, 0, num_pages, dma_addr, flags,
-                                   cpu_addr);
-               if (r)
-                       goto error_free;
+               amdgpu_gart_map(adev, 0, num_pages, dma_addr, flags, cpu_addr);
         } else {
                 dma_addr_t dma_address;
  
@@ -252,11 +263,8 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
                 dma_address += adev->vm_manager.vram_base_offset;
  
                 for (i = 0; i < num_pages; ++i) {
-                       r = amdgpu_gart_map(adev, i << PAGE_SHIFT, 1,
-                                           &dma_address, flags, cpu_addr);
-                       if (r)
-                               goto error_free;
-
+                       amdgpu_gart_map(adev, i << PAGE_SHIFT, 1, &dma_address,
+                                       flags, cpu_addr);
                         dma_address += PAGE_SIZE;
                 }
         }
@@ -297,9 +305,6 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
                                struct dma_resv *resv,
                                struct dma_fence **f)
  {
-       const uint32_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
-                                       AMDGPU_GPU_PAGE_SIZE);
-
         struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
         struct amdgpu_res_cursor src_mm, dst_mm;
         struct dma_fence *fence = NULL;
@@ -315,29 +320,20 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
  
         mutex_lock(&adev->mman.gtt_window_lock);
         while (src_mm.remaining) {
-               uint32_t src_page_offset = src_mm.start & ~PAGE_MASK;
-               uint32_t dst_page_offset = dst_mm.start & ~PAGE_MASK;
+               uint64_t from, to, cur_size;
                 struct dma_fence *next;
-               uint32_t cur_size;
-               uint64_t from, to;
  
-               /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
-                * begins at an offset, then adjust the size accordingly
-                */
-               cur_size = max(src_page_offset, dst_page_offset);
-               cur_size = min(min3(src_mm.size, dst_mm.size, size),
-                              (uint64_t)(GTT_MAX_BYTES - cur_size));
+               /* Never copy more than 256MiB at once to avoid a timeout */
+               cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20);
  
                 /* Map src to window 0 and dst to window 1. */
                 r = amdgpu_ttm_map_buffer(src->bo, src->mem, &src_mm,
-                                         PFN_UP(cur_size + src_page_offset),
-                                         0, ring, tmz, &from);
+                                         0, ring, tmz, &cur_size, &from);
                 if (r)
                         goto error;
  
                 r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, &dst_mm,
-                                         PFN_UP(cur_size + dst_page_offset),
-                                         1, ring, tmz, &to);
+                                         1, ring, tmz, &cur_size, &to);
                 if (r)
                         goto error;
  
@@ -396,8 +392,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
             (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
                 struct dma_fence *wipe_fence = NULL;
  
-               r = amdgpu_fill_buffer(ttm_to_amdgpu_bo(bo), AMDGPU_POISON,
-                                      NULL, &wipe_fence);
+               r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence);
                 if (r) {
                         goto error;
                 } else if (wipe_fence) {
@@ -821,14 +816,13 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev,
  #endif
  }
  
-static int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
-                               struct ttm_buffer_object *tbo,
-                               uint64_t flags)
+static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
+                                struct ttm_buffer_object *tbo,
+                                uint64_t flags)
  {
         struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
         struct ttm_tt *ttm = tbo->ttm;
         struct amdgpu_ttm_tt *gtt = (void *)ttm;
-       int r;
  
         if (amdgpu_bo_encrypted(abo))
                 flags |= AMDGPU_PTE_TMZ;
@@ -836,10 +830,8 @@ static int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
         if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
                 uint64_t page_idx = 1;
  
-               r = amdgpu_gart_bind(adev, gtt->offset, page_idx,
-                               gtt->ttm.dma_address, flags);
-               if (r)
-                       goto gart_bind_fail;
+               amdgpu_gart_bind(adev, gtt->offset, page_idx,
+                                gtt->ttm.dma_address, flags);
  
                 /* The memory type of the first page defaults to UC. Now
                  * modify the memory type to NC from the second page of
@@ -848,21 +840,13 @@ static int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
                 flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
                 flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
  
-               r = amdgpu_gart_bind(adev,
-                               gtt->offset + (page_idx << PAGE_SHIFT),
-                               ttm->num_pages - page_idx,
-                               &(gtt->ttm.dma_address[page_idx]), flags);
+               amdgpu_gart_bind(adev, gtt->offset + (page_idx << PAGE_SHIFT),
+                                ttm->num_pages - page_idx,
+                                &(gtt->ttm.dma_address[page_idx]), flags);
         } else {
-               r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
-                                    gtt->ttm.dma_address, flags);
+               amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
+                                gtt->ttm.dma_address, flags);
         }
-
-gart_bind_fail:
-       if (r)
-               DRM_ERROR("failed to bind %u pages at 0x%08llX\n",
-                         ttm->num_pages, gtt->offset);
-
-       return r;
  }
  
  /*
@@ -878,7 +862,7 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
         struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
         struct amdgpu_ttm_tt *gtt = (void*)ttm;
         uint64_t flags;
-       int r = 0;
+       int r;
  
         if (!bo_mem)
                 return -EINVAL;
@@ -925,14 +909,10 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
  
         /* bind pages into GART page tables */
         gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
-       r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
-               gtt->ttm.dma_address, flags);
-
-       if (r)
-               DRM_ERROR("failed to bind %u pages at 0x%08llX\n",
-                         ttm->num_pages, gtt->offset);
+       amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
+                        gtt->ttm.dma_address, flags);
         gtt->bound = true;
-       return r;
+       return 0;
  }
  
  /*
@@ -982,12 +962,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
  
         /* Bind pages */
         gtt->offset = (u64)tmp->start << PAGE_SHIFT;
-       r = amdgpu_ttm_gart_bind(adev, bo, flags);
-       if (unlikely(r)) {
-               ttm_resource_free(bo, &tmp);
-               return r;
-       }
-
+       amdgpu_ttm_gart_bind(adev, bo, flags);
         amdgpu_gart_invalidate_tlb(adev);
         ttm_resource_free(bo, &bo->resource);
         ttm_bo_assign_mem(bo, tmp);
@@ -1001,19 +976,16 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
   * Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to
   * rebind GTT pages during a GPU reset.
   */
-int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
+void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
  {
         struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
         uint64_t flags;
-       int r;
  
         if (!tbo->ttm)
-               return 0;
+               return;
  
         flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, tbo->resource);
-       r = amdgpu_ttm_gart_bind(adev, tbo, flags);
-
-       return r;
+       amdgpu_ttm_gart_bind(adev, tbo, flags);
  }
  
  /*
@@ -1027,7 +999,6 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
  {
         struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
         struct amdgpu_ttm_tt *gtt = (void *)ttm;
-       int r;
  
         /* if the pages have userptr pinning then clear that first */
         if (gtt->userptr) {
@@ -1047,10 +1018,7 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
                 return;
  
         /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
-       r = amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages);
-       if (r)
-               DRM_ERROR("failed to unbind %u pages at 0x%08llX\n",
-                         gtt->ttm.num_pages, gtt->offset);
+       amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages);
         gtt->bound = false;
  }
  
@@ -1168,6 +1136,26 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
         return ttm_pool_free(&adev->mman.bdev.pool, ttm);
  }
  
+/**
+ * amdgpu_ttm_tt_get_userptr - Return the userptr GTT ttm_tt for the current
+ * task
+ *
+ * @tbo: The ttm_buffer_object that contains the userptr
+ * @user_addr:  The returned value
+ */
+int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,
+                             uint64_t *user_addr)
+{
+       struct amdgpu_ttm_tt *gtt;
+
+       if (!tbo->ttm)
+               return -EINVAL;
+
+       gtt = (void *)tbo->ttm;
+       *user_addr = gtt->userptr;
+       return 0;
+}
+
  /**
   * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt for the current
   * task
@@ -1433,6 +1421,63 @@ static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
         }
  }
  
+static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
+                                       unsigned long offset, void *buf, int len, int write)
+{
+       struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
+       struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+       struct amdgpu_res_cursor src_mm;
+       struct amdgpu_job *job;
+       struct dma_fence *fence;
+       uint64_t src_addr, dst_addr;
+       unsigned int num_dw;
+       int r, idx;
+
+       if (len != PAGE_SIZE)
+               return -EINVAL;
+
+       if (!adev->mman.sdma_access_ptr)
+               return -EACCES;
+
+       if (!drm_dev_enter(adev_to_drm(adev), &idx))
+               return -ENODEV;
+
+       if (write)
+               memcpy(adev->mman.sdma_access_ptr, buf, len);
+
+       num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
+       r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_DELAYED, &job);
+       if (r)
+               goto out;
+
+       amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm);
+       src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) + src_mm.start;
+       dst_addr = amdgpu_bo_gpu_offset(adev->mman.sdma_access_bo);
+       if (write)
+               swap(src_addr, dst_addr);
+
+       amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, PAGE_SIZE, false);
+
+       amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
+       WARN_ON(job->ibs[0].length_dw > num_dw);
+
+       r = amdgpu_job_submit(job, &adev->mman.entity, AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
+       if (r) {
+               amdgpu_job_free(job);
+               goto out;
+       }
+
+       if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
+               r = -ETIMEDOUT;
+       dma_fence_put(fence);
+
+       if (!(r || write))
+               memcpy(buf, adev->mman.sdma_access_ptr, len);
+out:
+       drm_dev_exit(idx);
+       return r;
+}
+
  /**
   * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
   *
@@ -1457,6 +1502,10 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
         if (bo->resource->mem_type != TTM_PL_VRAM)
                 return -EIO;
  
+       if (amdgpu_device_has_timeouts_enabled(adev) &&
+                       !amdgpu_ttm_access_memory_sdma(bo, offset, buf, len, write))
+               return len;
+
         amdgpu_res_first(bo->resource, offset, len, &cursor);
         while (cursor.remaining) {
                 size_t count, size = cursor.size;
@@ -1797,6 +1846,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
                 return r;
         }
  
+       if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+                               AMDGPU_GEM_DOMAIN_GTT,
+                               &adev->mman.sdma_access_bo, NULL,
+                               &adev->mman.sdma_access_ptr))
+               DRM_WARN("Debug VRAM access will use slowpath MM access\n");
+
         return 0;
  }
  
@@ -1818,6 +1873,8 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
         if (adev->mman.stolen_reserved_size)
                 amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory,
                                       NULL, NULL);
+       amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,
+                                       &adev->mman.sdma_access_ptr);
         amdgpu_ttm_fw_reserve_vram_fini(adev);
  
         if (drm_dev_enter(adev_to_drm(adev), &idx)) {
@@ -1888,23 +1945,55 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
         adev->mman.buffer_funcs_enabled = enable;
  }
  
+static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
+                                 bool direct_submit,
+                                 unsigned int num_dw,
+                                 struct dma_resv *resv,
+                                 bool vm_needs_flush,
+                                 struct amdgpu_job **job)
+{
+       enum amdgpu_ib_pool_type pool = direct_submit ?
+               AMDGPU_IB_POOL_DIRECT :
+               AMDGPU_IB_POOL_DELAYED;
+       int r;
+
+       r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, pool, job);
+       if (r)
+               return r;
+
+       if (vm_needs_flush) {
+               (*job)->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo ?
+                                                       adev->gmc.pdb0_bo :
+                                                       adev->gart.bo);
+               (*job)->vm_needs_flush = true;
+       }
+       if (resv) {
+               r = amdgpu_sync_resv(adev, &(*job)->sync, resv,
+                                    AMDGPU_SYNC_ALWAYS,
+                                    AMDGPU_FENCE_OWNER_UNDEFINED);
+               if (r) {
+                       DRM_ERROR("sync failed (%d).\n", r);
+                       amdgpu_job_free(*job);
+                       return r;
+               }
+       }
+       return 0;
+}
+
  int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
                        uint64_t dst_offset, uint32_t byte_count,
                        struct dma_resv *resv,
                        struct dma_fence **fence, bool direct_submit,
                        bool vm_needs_flush, bool tmz)
  {
-       enum amdgpu_ib_pool_type pool = direct_submit ? AMDGPU_IB_POOL_DIRECT :
-               AMDGPU_IB_POOL_DELAYED;
         struct amdgpu_device *adev = ring->adev;
+       unsigned num_loops, num_dw;
         struct amdgpu_job *job;
-
         uint32_t max_bytes;
-       unsigned num_loops, num_dw;
         unsigned i;
         int r;
  
-       if (direct_submit && !ring->sched.ready) {
+       if (!direct_submit && !ring->sched.ready) {
                 DRM_ERROR("Trying to move memory with ring turned off.\n");
                 return -EINVAL;
         }
@@ -1912,26 +2001,11 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
         max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
         num_loops = DIV_ROUND_UP(byte_count, max_bytes);
         num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
-
-       r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, pool, &job);
+       r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw,
+                                  resv, vm_needs_flush, &job);
         if (r)
                 return r;
  
-       if (vm_needs_flush) {
-               job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo ?
-                                       adev->gmc.pdb0_bo : adev->gart.bo);
-               job->vm_needs_flush = true;
-       }
-       if (resv) {
-               r = amdgpu_sync_resv(adev, &job->sync, resv,
-                                    AMDGPU_SYNC_ALWAYS,
-                                    AMDGPU_FENCE_OWNER_UNDEFINED);
-               if (r) {
-                       DRM_ERROR("sync failed (%d).\n", r);
-                       goto error_free;
-               }
-       }
-
         for (i = 0; i < num_loops; i++) {
                 uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
  
@@ -1961,77 +2035,35 @@ error_free:
         return r;
  }
  
-int amdgpu_fill_buffer(struct amdgpu_bo *bo,
-                      uint32_t src_data,
-                      struct dma_resv *resv,
-                      struct dma_fence **fence)
+static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
+                              uint64_t dst_addr, uint32_t byte_count,
+                              struct dma_resv *resv,
+                              struct dma_fence **fence,
+                              bool vm_needs_flush)
  {
-       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-       uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
-       struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
-
-       struct amdgpu_res_cursor cursor;
+       struct amdgpu_device *adev = ring->adev;
         unsigned int num_loops, num_dw;
-       uint64_t num_bytes;
-
         struct amdgpu_job *job;
+       uint32_t max_bytes;
+       unsigned int i;
         int r;
  
-       if (!adev->mman.buffer_funcs_enabled) {
-               DRM_ERROR("Trying to clear memory with ring turned off.\n");
-               return -EINVAL;
-       }
-
-       if (bo->tbo.resource->mem_type == AMDGPU_PL_PREEMPT) {
-               DRM_ERROR("Trying to clear preemptible memory.\n");
-               return -EINVAL;
-       }
-
-       if (bo->tbo.resource->mem_type == TTM_PL_TT) {
-               r = amdgpu_ttm_alloc_gart(&bo->tbo);
-               if (r)
-                       return r;
-       }
-
-       num_bytes = bo->tbo.resource->num_pages << PAGE_SHIFT;
-       num_loops = 0;
-
-       amdgpu_res_first(bo->tbo.resource, 0, num_bytes, &cursor);
-       while (cursor.remaining) {
-               num_loops += DIV_ROUND_UP_ULL(cursor.size, max_bytes);
-               amdgpu_res_next(&cursor, cursor.size);
-       }
-       num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw;
-
-       /* for IB padding */
-       num_dw += 64;
-
-       r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_DELAYED,
-                                    &job);
+       max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
+       num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
+       num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
+       r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush,
+                                  &job);
         if (r)
                 return r;
  
-       if (resv) {
-               r = amdgpu_sync_resv(adev, &job->sync, resv,
-                                    AMDGPU_SYNC_ALWAYS,
-                                    AMDGPU_FENCE_OWNER_UNDEFINED);
-               if (r) {
-                       DRM_ERROR("sync failed (%d).\n", r);
-                       goto error_free;
-               }
-       }
-
-       amdgpu_res_first(bo->tbo.resource, 0, num_bytes, &cursor);
-       while (cursor.remaining) {
-               uint32_t cur_size = min_t(uint64_t, cursor.size, max_bytes);
-               uint64_t dst_addr = cursor.start;
+       for (i = 0; i < num_loops; i++) {
+               uint32_t cur_size = min(byte_count, max_bytes);
  
-               dst_addr += amdgpu_ttm_domain_start(adev,
-                                                   bo->tbo.resource->mem_type);
                 amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, dst_addr,
                                         cur_size);
  
-               amdgpu_res_next(&cursor, cur_size);
+               dst_addr += cur_size;
+               byte_count -= cur_size;
         }
  
         amdgpu_ring_pad_ib(ring, &job->ibs[0]);
@@ -2048,6 +2080,55 @@ error_free:
         return r;
  }
  
+int amdgpu_fill_buffer(struct amdgpu_bo *bo,
+                       uint32_t src_data,
+                       struct dma_resv *resv,
+                       struct dma_fence **f)
+{
+       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+       struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+       struct dma_fence *fence = NULL;
+       struct amdgpu_res_cursor dst;
+       int r;
+
+       if (!adev->mman.buffer_funcs_enabled) {
+               DRM_ERROR("Trying to clear memory with ring turned off.\n");
+               return -EINVAL;
+       }
+
+       amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &dst);
+
+       mutex_lock(&adev->mman.gtt_window_lock);
+       while (dst.remaining) {
+               struct dma_fence *next;
+               uint64_t cur_size, to;
+
+               /* Never fill more than 256MiB at once to avoid timeouts */
+               cur_size = min(dst.size, 256ULL << 20);
+
+               r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &dst,
+                                         1, ring, false, &cur_size, &to);
+               if (r)
+                       goto error;
+
+               r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,
+                                       &next, true);
+               if (r)
+                       goto error;
+
+               dma_fence_put(fence);
+               fence = next;
+
+               amdgpu_res_next(&dst, cur_size);
+       }
+error:
+       mutex_unlock(&adev->mman.gtt_window_lock);
+       if (f)
+               *f = dma_fence_get(fence);
+       dma_fence_put(fence);
+       return r;
+}
+
  /**
   * amdgpu_ttm_evict_resources - evict memory buffers
   * @adev: amdgpu device object