drm/amdgpu: add ring timeout information in devcoredump
authorSunil Khatri <sunil.khatri@amd.com>
Fri, 1 Mar 2024 12:05:35 +0000 (17:35 +0530)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 6 Mar 2024 20:24:50 +0000 (15:24 -0500)
Add ring timeout related information in the amdgpu
devcoredump file for debugging purposes.

During the gpu recovery process the registered call
is triggered and add the debug information in data
file created by devcoredump framework under the
directory /sys/class/devcoredump/devcdx/

Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h

index a59364e9b6ed2c7028075be276fc41d0c9a06771..147100c27c2d412e73e4143d7a0310536b0880de 100644 (file)
@@ -196,6 +196,13 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
                           coredump->reset_task_info.process_name,
                           coredump->reset_task_info.pid);
 
+       if (coredump->ring) {
+               drm_printf(&p, "\nRing timed out details\n");
+               drm_printf(&p, "IP Type: %d Ring Name: %s\n",
+                          coredump->ring->funcs->type,
+                          coredump->ring->name);
+       }
+
        if (coredump->reset_vram_lost)
                drm_printf(&p, "VRAM is lost due to GPU reset!\n");
        if (coredump->adev->reset_info.num_regs) {
@@ -220,6 +227,8 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
 {
        struct amdgpu_coredump_info *coredump;
        struct drm_device *dev = adev_to_drm(adev);
+       struct amdgpu_job *job = reset_context->job;
+       struct drm_sched_job *s_job;
 
        coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT);
 
@@ -241,6 +250,11 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
                }
        }
 
+       if (job) {
+               s_job = &job->base;
+               coredump->ring = to_amdgpu_ring(s_job->sched);
+       }
+
        coredump->adev = adev;
 
        ktime_get_ts64(&coredump->reset_time);
index 19899f6b9b2b419a0fdf2ed84c71f0278963f511..60522963aaca1a04e0b6c2fb53ee4f0d0ea06d19 100644 (file)
@@ -97,6 +97,7 @@ struct amdgpu_coredump_info {
        struct amdgpu_task_info         reset_task_info;
        struct timespec64               reset_time;
        bool                            reset_vram_lost;
+       struct amdgpu_ring                      *ring;
 };
 #endif