[PATCH V3] accel/amdxdna: Prevent PM resume deadlock in hwctx_sync_debug_bo()

From: Lizhi Hou

Date: Tue Jun 16 2026 - 17:24:47 EST


amdxdna_hwctx_sync_debug_bo() invokes the hardware hwctx_sync_debug_bo()
callback while holding xdna->dev_lock.

The callback may call amdxdna_cmd_submit(), which in turn calls
amdxdna_pm_resume_get(). If the device is suspended,
amdxdna_pm_resume_get() may synchronously execute amdxdna_pm_resume(),
which also acquires xdna->dev_lock, resulting in a deadlock.

Avoid the deadlock by calling amdxdna_pm_resume_get() before holding
xdna->dev_lock in both amdxdna_hwctx_sync_debug_bo() and
amdxdna_drm_config_hwctx_ioctl()

Fixes: 7ea046838021 ("accel/amdxdna: Support firmware debug buffer")
Signed-off-by: Lizhi Hou <lizhi.hou@xxxxxxx>
---
V2:
Fix jumping forward over the guard(mutex) declaration.
V3:
Fix the same in amdxdna_drm_config_hwctx_ioctl().

drivers/accel/amdxdna/amdxdna_ctx.c | 26 ++++++++++++++++++++++----
1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
index ff6c3e8e5a15..2ab4443e7094 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.c
+++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -356,16 +356,25 @@ int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
return -EINVAL;
}

- guard(mutex)(&xdna->dev_lock);
+ ret = amdxdna_pm_resume_get(xdna);
+ if (ret) {
+ XDNA_ERR(xdna, "Resume failed, ret %d", ret);
+ goto free_buf;
+ }
+
+ mutex_lock(&xdna->dev_lock);
hwctx = xa_load(&client->hwctx_xa, args->handle);
if (!hwctx) {
XDNA_DBG(xdna, "PID %d failed to get hwctx %d", client->pid, args->handle);
ret = -EINVAL;
- goto free_buf;
+ goto unlock;
}

ret = xdna->dev_info->ops->hwctx_config(hwctx, args->param_type, val, buf, buf_size);

+unlock:
+ mutex_unlock(&xdna->dev_lock);
+ amdxdna_pm_suspend_put(xdna);
free_buf:
kfree(buf);
return ret;
@@ -386,16 +395,25 @@ int amdxdna_hwctx_sync_debug_bo(struct amdxdna_client *client, u32 debug_bo_hdl)
if (!gobj)
return -EINVAL;

+ ret = amdxdna_pm_resume_get(xdna);
+ if (ret) {
+ XDNA_ERR(xdna, "Resume failed, ret %d", ret);
+ goto put_obj;
+ }
+
abo = to_xdna_obj(gobj);
- guard(mutex)(&xdna->dev_lock);
+ mutex_lock(&xdna->dev_lock);
hwctx = xa_load(&client->hwctx_xa, abo->assigned_hwctx);
if (!hwctx) {
ret = -EINVAL;
- goto put_obj;
+ goto unlock;
}

ret = xdna->dev_info->ops->hwctx_sync_debug_bo(hwctx, debug_bo_hdl);

+unlock:
+ mutex_unlock(&xdna->dev_lock);
+ amdxdna_pm_suspend_put(xdna);
put_obj:
drm_gem_object_put(gobj);
return ret;
--
2.34.1