Re: [PATCH v1 3/6] drm/amdgpu: save ring content before resetting the device

From: Alex Deucher

Date: Tue Feb 17 2026 - 11:21:09 EST


On Wed, Feb 11, 2026 at 5:29 AM Pierre-Eric Pelloux-Prayer
<pierre-eric.pelloux-prayer@xxxxxxx> wrote:
>
> Otherwise the content might not be relevant.
>
> When a coredump is generated the rings with outstanding fences
> are saved and then printed to the final devcoredump from the
> worker thread.
> Since this requires memory allocation, the ring capture might
> be missing from the generated devcoredump.
>
> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@xxxxxxx>

Reviewed-by: Alex Deucher <alexander.deucher@xxxxxxx>

> ---
> .../gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c | 85 +++++++++++++++----
> .../gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h | 13 ++-
> 2 files changed, 81 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
> index 0808ca98ccd9..0bf85ab43204 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
> @@ -204,7 +204,9 @@ amdgpu_devcoredump_format(char *buffer, size_t count, struct amdgpu_coredump_inf
> struct drm_print_iterator iter;
> struct amdgpu_vm_fault_info *fault_info;
> struct amdgpu_ip_block *ip_block;
> - int ver;
> + struct amdgpu_ring *ring;
> + int ver, i, j;
> + u32 ring_idx, off;
>
> iter.data = buffer;
> iter.offset = 0;
> @@ -294,23 +296,25 @@ amdgpu_devcoredump_format(char *buffer, size_t count, struct amdgpu_coredump_inf
>
> /* Add ring buffer information */
> drm_printf(&p, "Ring buffer information\n");
> - for (int i = 0; i < coredump->adev->num_rings; i++) {
> - int j = 0;
> - struct amdgpu_ring *ring = coredump->adev->rings[i];
> + if (coredump->num_rings) {
> + for (i = 0; i < coredump->num_rings; i++) {
> + ring_idx = coredump->rings[i].ring_index;
> + ring = coredump->adev->rings[ring_idx];
> + off = coredump->rings[i].offset;
>
> - drm_printf(&p, "ring name: %s\n", ring->name);
> - drm_printf(&p, "Rptr: 0x%llx Wptr: 0x%llx RB mask: %x\n",
> - amdgpu_ring_get_rptr(ring),
> - amdgpu_ring_get_wptr(ring),
> - ring->buf_mask);
> - drm_printf(&p, "Ring size in dwords: %d\n",
> - ring->ring_size / 4);
> - drm_printf(&p, "Ring contents\n");
> - drm_printf(&p, "Offset \t Value\n");
> + drm_printf(&p, "ring name: %s\n", ring->name);
> + drm_printf(&p, "Rptr: 0x%llx Wptr: 0x%llx RB mask: %x\n",
> + coredump->rings[i].rptr,
> + coredump->rings[i].wptr,
> + ring->buf_mask);
> + drm_printf(&p, "Ring size in dwords: %d\n",
> + ring->ring_size / 4);
> + drm_printf(&p, "Ring contents\n");
> + drm_printf(&p, "Offset \t Value\n");
>
> - while (j < ring->ring_size) {
> - drm_printf(&p, "0x%x \t 0x%x\n", j, ring->ring[j / 4]);
> - j += 4;
> + for (j = 0; j < ring->ring_size; j += 4)
> + drm_printf(&p, "0x%x \t 0x%x\n", j,
> + coredump->rings_dw[off + j / 4]);
> }
> }
>
> @@ -354,6 +358,8 @@ static void amdgpu_devcoredump_free(void *data)
> cancel_work_sync(&coredump->work);
> coredump->adev->coredump_in_progress = false;
> kfree(coredump->formatted);
> + kfree(coredump->rings);
> + kfree(coredump->rings_dw);
> kfree(data);
> }
>
> @@ -382,6 +388,12 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
> struct drm_device *dev = adev_to_drm(adev);
> struct amdgpu_coredump_info *coredump;
> struct drm_sched_job *s_job;
> + u64 total_ring_size, ring_count;
> + struct amdgpu_ring *ring;
> + int i, off, idx;
> +
> + if (adev->coredump_in_progress)
> + return;
>
> if (adev->coredump_in_progress)
> return;
> @@ -410,6 +422,47 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
> coredump->ring = to_amdgpu_ring(s_job->sched);
> }
>
> + /* Dump ring content if memory allocation succeeds. */
> + ring_count = 0;
> + total_ring_size = 0;
> + for (i = 0; i < adev->num_rings; i++) {
> + ring = adev->rings[i];
> +
> + /* Only dump rings with unsignalled fences. */
> + if (atomic_read(&ring->fence_drv.last_seq) == ring->fence_drv.sync_seq &&
> + coredump->ring != ring)
> + continue;
> +
> + total_ring_size += ring->ring_size;
> + ring_count++;
> + }
> + coredump->rings_dw = kzalloc(total_ring_size, GFP_NOWAIT);
> + coredump->rings = kcalloc(ring_count, sizeof(struct amdgpu_coredump_ring), GFP_NOWAIT);
> + if (coredump->rings && coredump->rings_dw) {
> + for (i = 0, off = 0, idx = 0; i < adev->num_rings; i++) {
> + ring = adev->rings[i];
> +
> + if (atomic_read(&ring->fence_drv.last_seq) == ring->fence_drv.sync_seq &&
> + coredump->ring != ring)
> + continue;
> +
> + coredump->rings[idx].ring_index = ring->idx;
> + coredump->rings[idx].rptr = amdgpu_ring_get_rptr(ring);
> + coredump->rings[idx].wptr = amdgpu_ring_get_wptr(ring);
> + coredump->rings[idx].offset = off;
> +
> + memcpy(&coredump->rings_dw[off], ring->ring, ring->ring_size);
> + off += ring->ring_size;
> + idx++;
> + }
> + coredump->num_rings = idx;
> + } else {
> + kfree(coredump->rings_dw);
> + kfree(coredump->rings);
> + coredump->rings_dw = NULL;
> + coredump->rings = NULL;
> + }
> +
> coredump->adev = adev;
>
> ktime_get_ts64(&coredump->reset_time);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
> index 4c37a852b74a..1c3d22356cc7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
> @@ -31,6 +31,13 @@
>
> #define AMDGPU_COREDUMP_VERSION "1"
>
> +struct amdgpu_coredump_ring {
> + u64 rptr;
> + u64 wptr;
> + u32 ring_index;
> + u32 offset;
> +};
> +
> struct amdgpu_coredump_info {
> struct amdgpu_device *adev;
> struct amdgpu_task_info reset_task_info;
> @@ -41,12 +48,16 @@ struct amdgpu_coredump_info {
> bool skip_vram_check;
> bool reset_vram_lost;
> struct amdgpu_ring *ring;
> +
> + struct amdgpu_coredump_ring *rings;
> + u32 *rings_dw;
> + u32 num_rings;
> +
> /* Readable form of coredevdump, generate once to speed up
> * reading it (see drm_coredump_printer's documentation).
> */
> ssize_t formatted_size;
> char *formatted;
> -
> };
> #endif
>
> --
> 2.43.0
>