Re: [PATCH V1] accel/amdxdna: Fix NULL pointer dereference of mgmt_chann
From: Lizhi Hou
Date: Fri Feb 27 2026 - 22:16:48 EST
On 2/27/26 12:11, Mario Limonciello wrote:
On 2/26/26 3:38 PM, Lizhi Hou wrote:
mgmt_chann may be set to NULL if the firmware returns an unexpected
error in aie2_send_mgmt_msg_wait(). This can later lead to a NULL
pointer dereference in aie2_hw_stop().
Fix this by introducing a dedicated helper to destroy mgmt_chann
and by adding proper NULL checks before accessing it.
Is this actually going to fix it? It sounds like a concurrency problem, no?
It is not concurrency issue. The code is protected by dev_lock. That is why I added
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
in my fix.
This is actually error code path issue. Normally, xdna_send_msg_wait() will not time out. This only happens with broken firmware. That is why it was not found before.
Lizhi
Do you want a mutex in the helper perhaps?
Fixes: b87f920b9344 ("accel/amdxdna: Support hardware mailbox")
Signed-off-by: Lizhi Hou <lizhi.hou@xxxxxxx>
---
drivers/accel/amdxdna/aie2_message.c | 21 ++++++++++++++++-----
drivers/accel/amdxdna/aie2_pci.c | 7 ++-----
drivers/accel/amdxdna/aie2_pci.h | 1 +
3 files changed, 19 insertions(+), 10 deletions(-)
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index 277a27bce850..22e1a85a7ae0 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -40,11 +40,8 @@ static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
return -ENODEV;
ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg);
- if (ret == -ETIME) {
- xdna_mailbox_stop_channel(ndev->mgmt_chann);
- xdna_mailbox_destroy_channel(ndev->mgmt_chann);
- ndev->mgmt_chann = NULL;
- }
+ if (ret == -ETIME)
+ aie2_destroy_mgmt_chann(ndev);
if (!ret && *hdl->status != AIE2_STATUS_SUCCESS) {
XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x",
@@ -914,6 +911,20 @@ void aie2_msg_init(struct amdxdna_dev_hdl *ndev)
ndev->exec_msg_ops = &legacy_exec_message_ops;
}
+void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev)
+{
+ struct amdxdna_dev *xdna = ndev->xdna;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+ if (!ndev->mgmt_chann)
+ return;
+
+ xdna_mailbox_stop_channel(ndev->mgmt_chann);
+ xdna_mailbox_destroy_channel(ndev->mgmt_chann);
+ ndev->mgmt_chann = NULL;
+}
+
static inline struct amdxdna_gem_obj *
aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
{
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index 85079b6fc5d9..977ce21eaf9f 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -330,9 +330,7 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna)
aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, NULL);
aie2_mgmt_fw_fini(ndev);
- xdna_mailbox_stop_channel(ndev->mgmt_chann);
- xdna_mailbox_destroy_channel(ndev->mgmt_chann);
- ndev->mgmt_chann = NULL;
+ aie2_destroy_mgmt_chann(ndev);
drmm_kfree(&xdna->ddev, ndev->mbox);
ndev->mbox = NULL;
aie2_psp_stop(ndev->psp_hdl);
@@ -441,8 +439,7 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
return 0;
destroy_mgmt_chann:
- xdna_mailbox_stop_channel(ndev->mgmt_chann);
- xdna_mailbox_destroy_channel(ndev->mgmt_chann);
+ aie2_destroy_mgmt_chann(ndev);
stop_psp:
aie2_psp_stop(ndev->psp_hdl);
fini_smu:
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index b20a3661078c..e72311c77996 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -303,6 +303,7 @@ int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev,
/* aie2_message.c */
void aie2_msg_init(struct amdxdna_dev_hdl *ndev);
+void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev);
int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value);