Merge tag 'drm-misc-next-2023-01-19' of git://anongit.freedesktop.org/drm/drm-misc...
[sfrench/cifs-2.6.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
index 09042486e66e29dac845b73c95681c53cd978cd7..76a2b4a4de10e57091627439ff315e2d468b5aca 100644 (file)
@@ -36,6 +36,7 @@
 #include <generated/utsrelease.h>
 #include <linux/pci-p2pdma.h>
 
+#include <drm/drm_aperture.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_fb_helper.h>
@@ -91,6 +92,8 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
 #define AMDGPU_MAX_RETRY_LIMIT         2
 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
 
+static const struct drm_driver amdgpu_kms_driver;
+
 const char *amdgpu_asic_name[] = {
        "TAHITI",
        "PITCAIRN",
@@ -925,32 +928,33 @@ static int amdgpu_device_asic_init(struct amdgpu_device *adev)
 }
 
 /**
- * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
+ * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
  *
  * @adev: amdgpu_device pointer
  *
  * Allocates a scratch page of VRAM for use by various things in the
  * driver.
  */
-static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
+static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
 {
-       return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
-                                      PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
-                                      &adev->vram_scratch.robj,
-                                      &adev->vram_scratch.gpu_addr,
-                                      (void **)&adev->vram_scratch.ptr);
+       return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
+                                      AMDGPU_GEM_DOMAIN_VRAM |
+                                      AMDGPU_GEM_DOMAIN_GTT,
+                                      &adev->mem_scratch.robj,
+                                      &adev->mem_scratch.gpu_addr,
+                                      (void **)&adev->mem_scratch.ptr);
 }
 
 /**
- * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
+ * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
  *
  * @adev: amdgpu_device pointer
  *
  * Frees the VRAM scratch page.
  */
-static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
+static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
 {
-       amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
+       amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
 }
 
 /**
@@ -1982,17 +1986,10 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
        }
 
        snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
-       err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
-       if (err) {
-               dev_err(adev->dev,
-                       "Failed to load gpu_info firmware \"%s\"\n",
-                       fw_name);
-               goto out;
-       }
-       err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
+       err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
        if (err) {
                dev_err(adev->dev,
-                       "Failed to validate gpu_info firmware \"%s\"\n",
+                       "Failed to get gpu_info firmware \"%s\"\n",
                        fw_name);
                goto out;
        }
@@ -2391,9 +2388,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
                        if (amdgpu_sriov_vf(adev))
                                amdgpu_virt_exchange_data(adev);
 
-                       r = amdgpu_device_vram_scratch_init(adev);
+                       r = amdgpu_device_mem_scratch_init(adev);
                        if (r) {
-                               DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
+                               DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
                                goto init_failed;
                        }
                        r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
@@ -2411,8 +2408,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
                        /* right after GMC hw init, we create CSA */
                        if (amdgpu_mcbp) {
                                r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
-                                                               AMDGPU_GEM_DOMAIN_VRAM,
-                                                               AMDGPU_CSA_SIZE);
+                                                              AMDGPU_GEM_DOMAIN_VRAM |
+                                                              AMDGPU_GEM_DOMAIN_GTT,
+                                                              AMDGPU_CSA_SIZE);
                                if (r) {
                                        DRM_ERROR("allocate CSA failed %d\n", r);
                                        goto init_failed;
@@ -2474,6 +2472,11 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
                        if (!amdgpu_sriov_vf(adev)) {
                                struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
 
+                               if (WARN_ON(!hive)) {
+                                       r = -ENOENT;
+                                       goto init_failed;
+                               }
+
                                if (!hive->reset_domain ||
                                    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
                                        r = -ENOENT;
@@ -2577,9 +2580,10 @@ int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
                i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
                if (!adev->ip_blocks[i].status.late_initialized)
                        continue;
-               /* skip CG for GFX on S0ix */
+               /* skip CG for GFX, SDMA on S0ix */
                if (adev->in_s0ix &&
-                   adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
+                   (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
+                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
                        continue;
                /* skip CG for VCE/UVD, it's handled specially */
                if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
@@ -2613,9 +2617,10 @@ int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
                i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
                if (!adev->ip_blocks[i].status.late_initialized)
                        continue;
-               /* skip PG for GFX on S0ix */
+               /* skip PG for GFX, SDMA on S0ix */
                if (adev->in_s0ix &&
-                   adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
+                   (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
+                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
                        continue;
                /* skip CG for VCE/UVD, it's handled specially */
                if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
@@ -2867,7 +2872,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
                        amdgpu_ucode_free_bo(adev);
                        amdgpu_free_static_csa(&adev->virt.csa_obj);
                        amdgpu_device_wb_fini(adev);
-                       amdgpu_device_vram_scratch_fini(adev);
+                       amdgpu_device_mem_scratch_fini(adev);
                        amdgpu_ib_pool_fini(adev);
                }
 
@@ -3012,14 +3017,21 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
                        continue;
                }
 
-               /* skip suspend of gfx and psp for S0ix
+               /* skip suspend of gfx/mes and psp for S0ix
                 * gfx is in gfxoff state, so on resume it will exit gfxoff just
                 * like at runtime. PSP is also part of the always on hardware
                 * so no need to suspend it.
                 */
                if (adev->in_s0ix &&
                    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
-                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX))
+                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
+                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
+                       continue;
+
+               /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
+               if (adev->in_s0ix &&
+                   (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
+                   (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
                        continue;
 
                /* XXX handle errors */
@@ -3222,15 +3234,6 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
                        return r;
                }
                adev->ip_blocks[i].status.hw = true;
-
-               if (adev->in_s0ix && adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
-                       /* disable gfxoff for IP resume. The gfxoff will be re-enabled in
-                        * amdgpu_device_resume() after IP resume.
-                        */
-                       amdgpu_gfx_off_ctrl(adev, false);
-                       DRM_DEBUG("will disable gfxoff for re-initializing other blocks\n");
-               }
-
        }
 
        return 0;
@@ -3682,6 +3685,11 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        if (r)
                return r;
 
+       /* Get rid of things like offb */
+       r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
+       if (r)
+               return r;
+
        /* Enable TMZ based on IP_VERSION */
        amdgpu_gmc_tmz_set(adev);
 
@@ -4017,8 +4025,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
 
        amdgpu_fence_driver_sw_fini(adev);
        amdgpu_device_ip_fini(adev);
-       release_firmware(adev->firmware.gpu_info_fw);
-       adev->firmware.gpu_info_fw = NULL;
+       amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
        adev->accel_working = false;
        dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
 
@@ -4106,6 +4113,11 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
 
        adev->in_suspend = true;
 
+       /* Evict the majority of BOs before grabbing the full access */
+       r = amdgpu_device_evict_resources(adev);
+       if (r)
+               return r;
+
        if (amdgpu_sriov_vf(adev)) {
                amdgpu_virt_fini_data_exchange(adev);
                r = amdgpu_virt_request_full_gpu(adev, false);
@@ -4180,21 +4192,15 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
 
        r = amdgpu_device_ip_resume(adev);
 
-       /* no matter what r is, always need to properly release full GPU */
-       if (amdgpu_sriov_vf(adev)) {
-               amdgpu_virt_init_data_exchange(adev);
-               amdgpu_virt_release_full_gpu(adev, true);
-       }
-
        if (r) {
                dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
-               return r;
+               goto exit;
        }
        amdgpu_fence_driver_hw_init(adev);
 
        r = amdgpu_device_ip_late_init(adev);
        if (r)
-               return r;
+               goto exit;
 
        queue_delayed_work(system_wq, &adev->delayed_init_work,
                           msecs_to_jiffies(AMDGPU_RESUME_MS));
@@ -4202,21 +4208,21 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
        if (!adev->in_s0ix) {
                r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
                if (r)
-                       return r;
+                       goto exit;
+       }
+
+exit:
+       if (amdgpu_sriov_vf(adev)) {
+               amdgpu_virt_init_data_exchange(adev);
+               amdgpu_virt_release_full_gpu(adev, true);
        }
 
+       if (r)
+               return r;
+
        /* Make sure IB tests flushed */
-       if (amdgpu_sriov_vf(adev))
-               amdgpu_irq_gpu_reset_resume_helper(adev);
        flush_delayed_work(&adev->delayed_init_work);
 
-       if (adev->in_s0ix) {
-               /* re-enable gfxoff after IP resume. This re-enables gfxoff after
-                * it was disabled for IP resume in amdgpu_device_ip_resume_phase2().
-                */
-               amdgpu_gfx_off_ctrl(adev, true);
-               DRM_DEBUG("will enable gfxoff for the mission mode\n");
-       }
        if (fbcon)
                drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
 
@@ -4597,11 +4603,6 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
        if (!amdgpu_ras_is_poison_mode_supported(adev))
                return true;
 
-       if (!amdgpu_device_ip_check_soft_reset(adev)) {
-               dev_info(adev->dev,"Timeout, but no hardware hang detected.\n");
-               return false;
-       }
-
        if (amdgpu_sriov_vf(adev))
                return true;
 
@@ -4726,7 +4727,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
                if (!need_full_reset)
                        need_full_reset = amdgpu_device_ip_need_full_reset(adev);
 
-               if (!need_full_reset && amdgpu_gpu_recovery) {
+               if (!need_full_reset && amdgpu_gpu_recovery &&
+                   amdgpu_device_ip_check_soft_reset(adev)) {
                        amdgpu_device_ip_pre_soft_reset(adev);
                        r = amdgpu_device_ip_soft_reset(adev);
                        amdgpu_device_ip_post_soft_reset(adev);
@@ -5044,6 +5046,8 @@ static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
                pm_runtime_enable(&(p->dev));
                pm_runtime_resume(&(p->dev));
        }
+
+       pci_dev_put(p);
 }
 
 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
@@ -5082,6 +5086,7 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
 
                if (expires < ktime_get_mono_fast_ns()) {
                        dev_warn(adev->dev, "failed to suspend display audio\n");
+                       pci_dev_put(p);
                        /* TODO: abort the succeeding gpu reset? */
                        return -ETIMEDOUT;
                }
@@ -5089,6 +5094,7 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
 
        pm_runtime_disable(&(p->dev));
 
+       pci_dev_put(p);
        return 0;
 }