drm/amdgpu: fix and cleanup gmc_v9_0_flush_gpu_tlb_pasid

author Christian König <christian.koenig@amd.com>

Mon, 4 Sep 2023 11:03:23 +0000 (13:03 +0200)

committer Alex Deucher <alexander.deucher@amd.com>

Tue, 26 Sep 2023 20:55:09 +0000 (16:55 -0400)
author Christian König <christian.koenig@amd.com>
Mon, 4 Sep 2023 11:03:23 +0000 (13:03 +0200)
committer Alex Deucher <alexander.deucher@amd.com>
Tue, 26 Sep 2023 20:55:09 +0000 (16:55 -0400)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c

index 6530f5d2b35b59669eff31391196bc7f72065c69..8614ed249bb415ab77b08ce72f6cc8148275aad2 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -32,6 +32,7 @@
  #include "amdgpu.h"
  #include "amdgpu_gmc.h"
  #include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
  #include "amdgpu_xgmi.h"
  
  #include <drm/drm_drv.h>
@@ -630,6 +631,65 @@ error_alloc:
         dev_err(adev->dev, "Error flushing GPU TLB using the SDMA (%d)!\n", r);
  }
  
+int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
+                                  uint32_t flush_type, bool all_hub,
+                                  uint32_t inst)
+{
+       u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT :
+               adev->usec_timeout;
+       struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring;
+       struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
+       unsigned int ndw;
+       signed long r;
+       uint32_t seq;
+
+       if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready ||
+           !down_read_trylock(&adev->reset_domain->sem)) {
+               return adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
+                                                               flush_type,
+                                                               all_hub, inst);
+       }
+
+       /* 2 dwords flush + 8 dwords fence */
+       ndw = kiq->pmf->invalidate_tlbs_size + 8;
+
+       if (adev->gmc.flush_tlb_needs_extra_type_2)
+               ndw += kiq->pmf->invalidate_tlbs_size;
+
+       if (adev->gmc.flush_tlb_needs_extra_type_0)
+               ndw += kiq->pmf->invalidate_tlbs_size;
+
+       spin_lock(&adev->gfx.kiq[inst].ring_lock);
+       amdgpu_ring_alloc(ring, ndw);
+       if (adev->gmc.flush_tlb_needs_extra_type_2)
+               kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub);
+
+       if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0)
+               kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub);
+
+       kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub);
+       r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+       if (r) {
+               amdgpu_ring_undo(ring);
+               spin_unlock(&adev->gfx.kiq[inst].ring_lock);
+               goto error_unlock_reset;
+       }
+
+       amdgpu_ring_commit(ring);
+       spin_unlock(&adev->gfx.kiq[inst].ring_lock);
+       r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
+       if (r < 1) {
+               dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
+               r = -ETIME;
+               goto error_unlock_reset;
+       }
+       r = 0;
+
+error_unlock_reset:
+       up_read(&adev->reset_domain->sem);
+       return r;
+}
+
  /**
   * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ
   * @adev: amdgpu_device pointer
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h

index 9e7df2f69123cd77543d0555baeac1cb84cb71ae..7732d4ef845eace751d3639916e6462811392985 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -335,11 +335,12 @@ struct amdgpu_gmc {
         u64 MC_VM_MX_L1_TLB_CNTL;
  
         u64 noretry_flags;
+
+       bool flush_tlb_needs_extra_type_0;
+       bool flush_tlb_needs_extra_type_2;
+       bool flush_pasid_uses_kiq;
  };
  
-#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub, inst) \
-       ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
-       ((adev), (pasid), (type), (allhub), (inst)))
  #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
  #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
  #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
@@ -404,6 +405,9 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
  int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
  void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
                               uint32_t vmhub, uint32_t flush_type);
+int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
+                                  uint32_t flush_type, bool all_hub,
+                                  uint32_t inst);
  
  extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev);
  extern void amdgpu_gmc_noretry_set(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

index f7d0545598bdc18ba3e0f2dbdc492efad3f0db6d..95a60f6cd35956f0991f7830056502bfbc54c5ba 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -956,89 +956,30 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
                                         uint16_t pasid, uint32_t flush_type,
                                         bool all_hub, uint32_t inst)
  {
-       int vmid, i;
-       signed long r;
-       uint32_t seq;
-       uint16_t queried_pasid;
-       bool ret;
-       u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
-       struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring;
-       struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
-
-       if (amdgpu_in_reset(adev))
-               return -EIO;
-
-       if (ring->sched.ready && down_read_trylock(&adev->reset_domain->sem)) {
-               /* Vega20+XGMI caches PTEs in TC and TLB. Add a
-                * heavy-weight TLB flush (type 2), which flushes
-                * both. Due to a race condition with concurrent
-                * memory accesses using the same TLB cache line, we
-                * still need a second TLB flush after this.
-                */
-               bool vega20_xgmi_wa = (adev->gmc.xgmi.num_physical_nodes &&
-                                      amdgpu_ip_version(adev, GC_HWIP, 0) ==
-                                              IP_VERSION(9, 4, 0));
-               /* 2 dwords flush + 8 dwords fence */
-               unsigned int ndw = kiq->pmf->invalidate_tlbs_size + 8;
-
-               if (vega20_xgmi_wa)
-                       ndw += kiq->pmf->invalidate_tlbs_size;
-
-               spin_lock(&adev->gfx.kiq[inst].ring_lock);
-               /* 2 dwords flush + 8 dwords fence */
-               amdgpu_ring_alloc(ring, ndw);
-               if (vega20_xgmi_wa)
-                       kiq->pmf->kiq_invalidate_tlbs(ring,
-                                                     pasid, 2, all_hub);
-
-               if (flush_type == 2 &&
-                   amdgpu_ip_version(adev, GC_HWIP, 0) ==
-                           IP_VERSION(9, 4, 3) &&
-                   adev->rev_id == 0)
-                       kiq->pmf->kiq_invalidate_tlbs(ring,
-                                               pasid, 0, all_hub);
-
-               kiq->pmf->kiq_invalidate_tlbs(ring,
-                                       pasid, flush_type, all_hub);
-               r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
-               if (r) {
-                       amdgpu_ring_undo(ring);
-                       spin_unlock(&adev->gfx.kiq[inst].ring_lock);
-                       up_read(&adev->reset_domain->sem);
-                       return -ETIME;
-               }
-
-               amdgpu_ring_commit(ring);
-               spin_unlock(&adev->gfx.kiq[inst].ring_lock);
-               r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
-               if (r < 1) {
-                       dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
-                       up_read(&adev->reset_domain->sem);
-                       return -ETIME;
-               }
-               up_read(&adev->reset_domain->sem);
-               return 0;
-       }
+       uint16_t queried;
+       int i, vmid;
  
         for (vmid = 1; vmid < 16; vmid++) {
+               bool valid;
  
-               ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
-                               &queried_pasid);
-               if (ret && queried_pasid == pasid) {
-                       if (all_hub) {
-                               for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
-                                       gmc_v9_0_flush_gpu_tlb(adev, vmid,
-                                                       i, flush_type);
-                       } else {
-                               gmc_v9_0_flush_gpu_tlb(adev, vmid,
-                                               AMDGPU_GFXHUB(0), flush_type);
-                       }
-                       break;
+               valid = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+                                                                &queried);
+               if (!valid || queried != pasid)
+                       continue;
+
+               if (all_hub) {
+                       for_each_set_bit(i, adev->vmhubs_mask,
+                                        AMDGPU_MAX_VMHUBS)
+                               gmc_v9_0_flush_gpu_tlb(adev, vmid, i,
+                                                      flush_type);
+               } else {
+                       gmc_v9_0_flush_gpu_tlb(adev, vmid,
+                                              AMDGPU_GFXHUB(0),
+                                              flush_type);
                 }
         }
  
         return 0;
-
  }
  
  static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
@@ -2362,6 +2303,24 @@ static int gmc_v9_0_hw_init(void *handle)
         bool value;
         int i, r;
  
+       adev->gmc.flush_pasid_uses_kiq = true;
+
+       /* Vega20+XGMI caches PTEs in TC and TLB. Add a heavy-weight TLB flush
+        * (type 2), which flushes both. Due to a race condition with
+        * concurrent memory accesses using the same TLB cache line, we still
+        * need a second TLB flush after this.
+        */
+       adev->gmc.flush_tlb_needs_extra_type_2 =
+               amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) &&
+               adev->gmc.xgmi.num_physical_nodes;
+       /*
+        * TODO: This workaround is badly documented and had a buggy
+        * implementation. We should probably verify what we do here.
+        */
+       adev->gmc.flush_tlb_needs_extra_type_0 =
+               amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) &&
+               adev->rev_id == 0;
+
         /* The sequence of these two function calls matters.*/
         gmc_v9_0_init_golden_registers(adev);
author	Christian König <christian.koenig@amd.com>
	Mon, 4 Sep 2023 11:03:23 +0000 (13:03 +0200)
committer	Alex Deucher <alexander.deucher@amd.com>
	Tue, 26 Sep 2023 20:55:09 +0000 (16:55 -0400)
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c		patch \| blob \| history