[PATCH v2 2/6] drm/amdgpu: move devcoredump generation to a worker

From: Pierre-Eric Pelloux-Prayer

Date: Thu Feb 26 2026 - 04:54:59 EST


Update the way drm_coredump_printer is used based on its documentation
and Xe's code: the main idea is to generate the final version in one go
and then use memcpy to return the chunks requested by the caller of
amdgpu_devcoredump_read.

The generation is moved to a separate worker thread.

This cuts the time to copy the dump from 40s to ~0s on my machine.

Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@xxxxxxx>
Acked-by: Alex Deucher <alexander.deucher@xxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 ++
.../gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c | 66 +++++++++++++++++--
.../gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h | 9 +++
3 files changed, 74 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 057c8bd2ad89..ae81a428cfb5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1200,6 +1200,11 @@ struct amdgpu_device {

struct amdgpu_reset_domain *reset_domain;

+#ifdef CONFIG_DEV_COREDUMP
+ /* If a coredump state capture is in progress don't start a new one. */
+ bool coredump_in_progress;
+#endif
+
struct mutex benchmark_mutex;

bool scpm_enabled;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
index 42a969512dcc..0808ca98ccd9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
@@ -34,6 +34,8 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
}
#else

+#define AMDGPU_CORE_DUMP_SIZE_MAX (256 * 1024 * 1024)
+
const char *hw_ip_names[MAX_HWIP] = {
[GC_HWIP] = "GC",
[HDP_HWIP] = "HDP",
@@ -196,11 +198,9 @@ static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev,
}

static ssize_t
-amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
- void *data, size_t datalen)
+amdgpu_devcoredump_format(char *buffer, size_t count, struct amdgpu_coredump_info *coredump)
{
struct drm_printer p;
- struct amdgpu_coredump_info *coredump = data;
struct drm_print_iterator iter;
struct amdgpu_vm_fault_info *fault_info;
struct amdgpu_ip_block *ip_block;
@@ -208,7 +208,6 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,

iter.data = buffer;
iter.offset = 0;
- iter.start = offset;
iter.remain = count;

p = drm_coredump_printer(&iter);
@@ -323,11 +322,60 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
return count - iter.remain;
}

+static ssize_t
+amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
+ void *data, size_t datalen)
+{
+ struct amdgpu_coredump_info *coredump = data;
+ ssize_t byte_copied;
+
+ if (!coredump)
+ return -ENODEV;
+
+ flush_work(&coredump->work);
+
+ if (!coredump->formatted)
+ return -ENODEV;
+
+ if (offset >= coredump->formatted_size)
+ return 0;
+
+ byte_copied = count < coredump->formatted_size - offset ? count :
+ coredump->formatted_size - offset;
+ memcpy(buffer, coredump->formatted + offset, byte_copied);
+
+ return byte_copied;
+}
+
static void amdgpu_devcoredump_free(void *data)
{
+ struct amdgpu_coredump_info *coredump = data;
+
+ cancel_work_sync(&coredump->work);
+ coredump->adev->coredump_in_progress = false;
+ kfree(coredump->formatted);
kfree(data);
}

+static void amdgpu_devcoredump_deferred_work(struct work_struct *work)
+{
+ struct amdgpu_coredump_info *coredump = container_of(work, typeof(*coredump), work);
+
+ dev_coredumpm(coredump->adev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT,
+ amdgpu_devcoredump_read, amdgpu_devcoredump_free);
+
+ /* Do a one-time preparation of the coredump output because
+ * repeatingly calling drm_coredump_printer is very slow.
+ */
+ coredump->formatted_size =
+ amdgpu_devcoredump_format(NULL, AMDGPU_CORE_DUMP_SIZE_MAX, coredump);
+ coredump->formatted = kvzalloc(coredump->formatted_size, GFP_KERNEL);
+ if (!coredump->formatted)
+ return;
+ amdgpu_devcoredump_format(coredump->formatted, coredump->formatted_size, coredump);
+ coredump->adev->coredump_in_progress = false;
+}
+
void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
bool vram_lost, struct amdgpu_job *job)
{
@@ -335,10 +383,15 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
struct amdgpu_coredump_info *coredump;
struct drm_sched_job *s_job;

+ if (adev->coredump_in_progress)
+ return;
+
coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT);
if (!coredump)
return;

+ adev->coredump_in_progress = true;
+
coredump->skip_vram_check = skip_vram_check;
coredump->reset_vram_lost = vram_lost;

@@ -361,8 +414,9 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,

ktime_get_ts64(&coredump->reset_time);

- dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT,
- amdgpu_devcoredump_read, amdgpu_devcoredump_free);
+ /* Kick off coredump formatting to a worker thread. */
+ INIT_WORK(&coredump->work, amdgpu_devcoredump_deferred_work);
+ queue_work(system_unbound_wq, &coredump->work);

drm_info(dev, "AMDGPU device coredump file has been created\n");
drm_info(dev, "Check your /sys/class/drm/card%d/device/devcoredump/data\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
index ef9772c6bcc9..4c37a852b74a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
@@ -35,9 +35,18 @@ struct amdgpu_coredump_info {
struct amdgpu_device *adev;
struct amdgpu_task_info reset_task_info;
struct timespec64 reset_time;
+
+ struct work_struct work;
+
bool skip_vram_check;
bool reset_vram_lost;
struct amdgpu_ring *ring;
+ /* Readable form of coredevdump, generate once to speed up
+ * reading it (see drm_coredump_printer's documentation).
+ */
+ ssize_t formatted_size;
+ char *formatted;
+
};
#endif

--
2.43.0