Merge tag 'amd-drm-next-6.7-2023-10-13' of https://gitlab.freedesktop.org/agd5f/linux...

[sfrench/cifs-2.6.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_gmc.c
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c

index c7793db6d098a123fb70e313eec5c1e74a7982bf..a02992bff6afa3ec850b68c6003c34f679397b76 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -32,6 +32,7 @@
  #include "amdgpu.h"
  #include "amdgpu_gmc.h"
  #include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
  #include "amdgpu_xgmi.h"
  
  #include <drm/drm_drv.h>
@@ -263,12 +264,14 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc
   *
   * @adev: amdgpu device structure holding all necessary information
   * @mc: memory controller structure holding memory information
+ * @gart_placement: GART placement policy with respect to VRAM
   *
   * Function will place try to place GART before or after VRAM.
   * If GART size is bigger than space left then we ajust GART size.
   * Thus function will never fails.
   */
-void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
+void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
+                             enum amdgpu_gart_placement gart_placement)
  {
         const uint64_t four_gb = 0x100000000ULL;
         u64 size_af, size_bf;
@@ -286,11 +289,22 @@ void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
                 mc->gart_size = max(size_bf, size_af);
         }
  
-       if ((size_bf >= mc->gart_size && size_bf < size_af) ||
-           (size_af < mc->gart_size))
-               mc->gart_start = 0;
-       else
+       switch (gart_placement) {
+       case AMDGPU_GART_PLACEMENT_HIGH:
                 mc->gart_start = max_mc_address - mc->gart_size + 1;
+               break;
+       case AMDGPU_GART_PLACEMENT_LOW:
+               mc->gart_start = 0;
+               break;
+       case AMDGPU_GART_PLACEMENT_BEST_FIT:
+       default:
+               if ((size_bf >= mc->gart_size && size_bf < size_af) ||
+                   (size_af < mc->gart_size))
+                       mc->gart_start = 0;
+               else
+                       mc->gart_start = max_mc_address - mc->gart_size + 1;
+               break;
+       }
  
         mc->gart_start &= ~(four_gb - 1);
         mc->gart_end = mc->gart_start + mc->gart_size - 1;
@@ -315,14 +329,6 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
         const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1);
         u64 size_af, size_bf;
  
-       if (amdgpu_sriov_vf(adev)) {
-               mc->agp_start = 0xffffffffffff;
-               mc->agp_end = 0x0;
-               mc->agp_size = 0;
-
-               return;
-       }
-
         if (mc->fb_start > mc->gart_start) {
                 size_bf = (mc->fb_start & sixteen_gb_mask) -
                         ALIGN(mc->gart_end + 1, sixteen_gb);
@@ -346,6 +352,25 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
                         mc->agp_size >> 20, mc->agp_start, mc->agp_end);
  }
  
+/**
+ * amdgpu_gmc_set_agp_default - Set the default AGP aperture value.
+ * @adev: amdgpu device structure holding all necessary information
+ * @mc: memory controller structure holding memory information
+ *
+ * To disable the AGP aperture, you need to set the start to a larger
+ * value than the end.  This function sets the default value which
+ * can then be overridden using amdgpu_gmc_agp_location() if you want
+ * to enable the AGP aperture on a specific chip.
+ *
+ */
+void amdgpu_gmc_set_agp_default(struct amdgpu_device *adev,
+                               struct amdgpu_gmc *mc)
+{
+       mc->agp_start = 0xffffffffffff;
+       mc->agp_end = 0;
+       mc->agp_size = 0;
+}
+
  /**
   * amdgpu_gmc_fault_key - get hask key from vm fault address and pasid
   *
@@ -452,7 +477,10 @@ void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
         uint32_t hash;
         uint64_t tmp;
  
-       ih = adev->irq.retry_cam_enabled ? &adev->irq.ih_soft : &adev->irq.ih1;
+       if (adev->irq.retry_cam_enabled)
+               return;
+
+       ih = &adev->irq.ih1;
         /* Get the WPTR of the last entry in IH ring */
         last_wptr = amdgpu_ih_get_wptr(adev, ih);
         /* Order wptr with ring data. */
@@ -579,6 +607,142 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
         return 0;
  }
  
+void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+                             uint32_t vmhub, uint32_t flush_type)
+{
+       struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+       struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
+       struct dma_fence *fence;
+       struct amdgpu_job *job;
+       int r;
+
+       if (!hub->sdma_invalidation_workaround || vmid ||
+           !adev->mman.buffer_funcs_enabled ||
+           !adev->ib_pool_ready || amdgpu_in_reset(adev) ||
+           !ring->sched.ready) {
+
+               /*
+                * A GPU reset should flush all TLBs anyway, so no need to do
+                * this while one is ongoing.
+                */
+               if (!down_read_trylock(&adev->reset_domain->sem))
+                       return;
+
+               if (adev->gmc.flush_tlb_needs_extra_type_2)
+                       adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid,
+                                                          vmhub, 2);
+
+               if (adev->gmc.flush_tlb_needs_extra_type_0 && flush_type == 2)
+                       adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid,
+                                                          vmhub, 0);
+
+               adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, vmhub,
+                                                  flush_type);
+               up_read(&adev->reset_domain->sem);
+               return;
+       }
+
+       /* The SDMA on Navi 1x has a bug which can theoretically result in memory
+        * corruption if an invalidation happens at the same time as an VA
+        * translation. Avoid this by doing the invalidation from the SDMA
+        * itself at least for GART.
+        */
+       mutex_lock(&adev->mman.gtt_window_lock);
+       r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
+                                    AMDGPU_FENCE_OWNER_UNDEFINED,
+                                    16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
+                                    &job);
+       if (r)
+               goto error_alloc;
+
+       job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
+       job->vm_needs_flush = true;
+       job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
+       amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+       fence = amdgpu_job_submit(job);
+       mutex_unlock(&adev->mman.gtt_window_lock);
+
+       dma_fence_wait(fence, false);
+       dma_fence_put(fence);
+
+       return;
+
+error_alloc:
+       mutex_unlock(&adev->mman.gtt_window_lock);
+       dev_err(adev->dev, "Error flushing GPU TLB using the SDMA (%d)!\n", r);
+}
+
+int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
+                                  uint32_t flush_type, bool all_hub,
+                                  uint32_t inst)
+{
+       u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT :
+               adev->usec_timeout;
+       struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring;
+       struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
+       unsigned int ndw;
+       signed long r;
+       uint32_t seq;
+
+       if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready ||
+           !down_read_trylock(&adev->reset_domain->sem)) {
+
+               if (adev->gmc.flush_tlb_needs_extra_type_2)
+                       adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
+                                                                2, all_hub,
+                                                                inst);
+
+               if (adev->gmc.flush_tlb_needs_extra_type_0 && flush_type == 2)
+                       adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
+                                                                0, all_hub,
+                                                                inst);
+
+               adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
+                                                        flush_type, all_hub,
+                                                        inst);
+               return 0;
+       }
+
+       /* 2 dwords flush + 8 dwords fence */
+       ndw = kiq->pmf->invalidate_tlbs_size + 8;
+
+       if (adev->gmc.flush_tlb_needs_extra_type_2)
+               ndw += kiq->pmf->invalidate_tlbs_size;
+
+       if (adev->gmc.flush_tlb_needs_extra_type_0)
+               ndw += kiq->pmf->invalidate_tlbs_size;
+
+       spin_lock(&adev->gfx.kiq[inst].ring_lock);
+       amdgpu_ring_alloc(ring, ndw);
+       if (adev->gmc.flush_tlb_needs_extra_type_2)
+               kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub);
+
+       if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0)
+               kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub);
+
+       kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub);
+       r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+       if (r) {
+               amdgpu_ring_undo(ring);
+               spin_unlock(&adev->gfx.kiq[inst].ring_lock);
+               goto error_unlock_reset;
+       }
+
+       amdgpu_ring_commit(ring);
+       spin_unlock(&adev->gfx.kiq[inst].ring_lock);
+       r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
+       if (r < 1) {
+               dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
+               r = -ETIME;
+               goto error_unlock_reset;
+       }
+       r = 0;
+
+error_unlock_reset:
+       up_read(&adev->reset_domain->sem);
+       return r;
+}
+
  /**
   * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ
   * @adev: amdgpu_device pointer