Re: [PATCH V1 4/6] accel/amdxdna: Add AIE4 firmware loading
From: Mario Limonciello
Date: Mon Mar 30 2026 - 16:26:39 EST
On 3/30/26 11:37, Lizhi Hou wrote:
From: David Zhang <yidong.zhang@xxxxxxx>
Add support for loading AIE4 firmware through the common PSP
interfaces.
Compared to AIE2, AIE4 introduces an additional CERT firmware image.
aiem_psp_create() performs CERT setup when the CERT image size is
non-zero.
Co-developed-by: Hayden Laccabue <Hayden.Laccabue@xxxxxxx>
Signed-off-by: Hayden Laccabue <Hayden.Laccabue@xxxxxxx>
Signed-off-by: David Zhang <yidong.zhang@xxxxxxx>
Signed-off-by: Lizhi Hou <lizhi.hou@xxxxxxx>
---
drivers/accel/amdxdna/aie.h | 4 +
drivers/accel/amdxdna/aie2_pci.c | 2 +
drivers/accel/amdxdna/aie4_pci.c | 109 ++++++++++++++++++++++-
drivers/accel/amdxdna/aie4_pci.h | 4 +
drivers/accel/amdxdna/aie_psp.c | 141 +++++++++++++++++++++++-------
drivers/accel/amdxdna/npu3_regs.c | 23 +++++
6 files changed, 247 insertions(+), 36 deletions(-)
diff --git a/drivers/accel/amdxdna/aie.h b/drivers/accel/amdxdna/aie.h
index 124c0f7e9ca0..423ed34af9ee 100644
--- a/drivers/accel/amdxdna/aie.h
+++ b/drivers/accel/amdxdna/aie.h
@@ -57,7 +57,11 @@ struct aie_bar_off_pair {
struct psp_config {
const void *fw_buf;
u32 fw_size;
+ const void *certfw_buf;
+ u32 certfw_size;
void __iomem *psp_regs[PSP_MAX_REGS];
+ u32 arg2_mask;
+ u32 notify_val;
};
/* aie.c */
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index e4b7893bd429..0489e668cd73 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -549,6 +549,8 @@ static int aie2_init(struct amdxdna_dev *xdna)
psp_conf.fw_size = fw->size;
psp_conf.fw_buf = fw->data;
+ psp_conf.arg2_mask = GENMASK(23, 0);
+ psp_conf.notify_val = 1;
for (i = 0; i < PSP_MAX_REGS; i++)
psp_conf.psp_regs[i] = tbl[PSP_REG_BAR(ndev, i)] + PSP_REG_OFF(ndev, i);
ndev->aie.psp_hdl = aiem_psp_create(&xdna->ddev, &psp_conf);
diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
index 0f360c1ccebd..e7993b315996 100644
--- a/drivers/accel/amdxdna/aie4_pci.c
+++ b/drivers/accel/amdxdna/aie4_pci.c
@@ -6,11 +6,15 @@
#include <drm/amdxdna_accel.h>
#include <drm/drm_managed.h>
#include <drm/drm_print.h>
+#include <linux/firmware.h>
+#include <linux/sizes.h>
#include "aie4_pci.h"
#include "amdxdna_pci_drv.h"
-#define NO_IOHUB 0
+#define NO_IOHUB 0
+#define CERTFW_MAX_SIZE (SZ_32K + SZ_256)
+#define PSP_NOTIFY_INTR 0xD007BE11
/*
* The management mailbox channel is allocated by firmware.
@@ -207,13 +211,12 @@ static int aie4_mailbox_init(struct amdxdna_dev *xdna)
static void aie4_fw_unload(struct amdxdna_dev_hdl *ndev)
{
- /* TODO */
+ aie_psp_stop(ndev->aie.psp_hdl);
}
static int aie4_fw_load(struct amdxdna_dev_hdl *ndev)
{
- /* TODO */
- return 0;
+ return aie_psp_start(ndev->aie.psp_hdl);
}
static int aie4_hw_start(struct amdxdna_dev *xdna)
@@ -261,11 +264,98 @@ static void aie4_hw_stop(struct amdxdna_dev *xdna)
aie4_fw_unload(ndev);
}
+static int aie4_request_firmware(struct amdxdna_dev_hdl *ndev,
+ const struct firmware **npufw,
+ const struct firmware **certfw)
+{
+ struct amdxdna_dev *xdna = ndev->aie.xdna;
+ struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
+ char fw_name[128];
+ int ret;
+
+ ret = snprintf(fw_name, sizeof(fw_name), "amdnpu/%04x_%02x/%s",
+ pdev->device, pdev->revision, ndev->priv->npufw_path);
+ if (ret >= sizeof(fw_name)) {
+ XDNA_ERR(xdna, "npu firmware path is truncated");
+ return -EINVAL;
+ }
+
+ ret = request_firmware(npufw, fw_name, &pdev->dev);
+ if (ret) {
+ XDNA_ERR(xdna, "failed to request_firmware %s, ret %d", fw_name, ret);
+ return ret;
+ }
+
+ ret = snprintf(fw_name, sizeof(fw_name), "amdnpu/%04x_%02x/%s",
+ pdev->device, pdev->revision, ndev->priv->certfw_path);
+ if (ret >= sizeof(fw_name)) {
+ XDNA_ERR(xdna, "cert firmware path is truncated");
+ ret = -EINVAL;
+ goto release_npufw;
+ }
+
+ ret = request_firmware(certfw, fw_name, &pdev->dev);
+ if (ret) {
+ XDNA_ERR(xdna, "failed to request_firmware %s, ret %d", fw_name, ret);
+ goto release_npufw;
+ }
+
+ if ((*certfw)->size > CERTFW_MAX_SIZE) {
+ XDNA_ERR(xdna, "CERTFW over maximum size of 32 KB + 256 B");
+ ret = -EINVAL;
+ goto release_certfw;
+ }
+
+ return 0;
+
+release_certfw:
+ release_firmware(*certfw);
+release_npufw:
+ release_firmware(*npufw);
+
+ return ret;
+}
+
+static void aie4_release_firmware(struct amdxdna_dev_hdl *ndev,
+ const struct firmware *npufw,
+ const struct firmware *certfw)
+{
+ release_firmware(certfw);
+ release_firmware(npufw);
+}
+
+static int aie4_prepare_firmware(struct amdxdna_dev_hdl *ndev,
+ const struct firmware *npufw,
+ const struct firmware *certfw,
+ void __iomem *tbl[PCI_NUM_RESOURCES])
+{
+ struct amdxdna_dev *xdna = ndev->aie.xdna;
+ struct psp_config psp_conf;
+ int i;
+
+ psp_conf.fw_size = npufw->size;
+ psp_conf.fw_buf = npufw->data;
+ psp_conf.certfw_size = certfw->size;
+ psp_conf.certfw_buf = certfw->data;
+ psp_conf.arg2_mask = ~0;
+ psp_conf.notify_val = PSP_NOTIFY_INTR;
+ for (i = 0; i < PSP_MAX_REGS; i++)
+ psp_conf.psp_regs[i] = tbl[PSP_REG_BAR(ndev, i)] + PSP_REG_OFF(ndev, i);
+ ndev->aie.psp_hdl = aiem_psp_create(&xdna->ddev, &psp_conf);
+ if (!ndev->aie.psp_hdl) {
+ XDNA_ERR(xdna, "failed to create psp");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
static int aie4_pcidev_init(struct amdxdna_dev_hdl *ndev)
{
struct amdxdna_dev *xdna = ndev->aie.xdna;
struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
void __iomem *tbl[PCI_NUM_RESOURCES] = {0};
+ const struct firmware *npufw, *certfw;
unsigned long bars = 0;
int ret, i;
@@ -282,6 +372,8 @@ static int aie4_pcidev_init(struct amdxdna_dev_hdl *ndev)
return ret;
}
+ for (i = 0; i < PSP_MAX_REGS; i++)
+ set_bit(PSP_REG_BAR(ndev, i), &bars);
set_bit(xdna->dev_info->mbox_bar, &bars);
set_bit(xdna->dev_info->sram_bar, &bars);
@@ -300,6 +392,15 @@ static int aie4_pcidev_init(struct amdxdna_dev_hdl *ndev)
pci_set_master(pdev);
+ ret = aie4_request_firmware(ndev, &npufw, &certfw);
+ if (ret)
+ goto clear_master;
+
+ ret = aie4_prepare_firmware(ndev, npufw, certfw, tbl);
+ aie4_release_firmware(ndev, npufw, certfw);
+ if (ret)
+ goto clear_master;
+
ret = aie4_irq_init(xdna);
if (ret)
goto clear_master;
diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
index f3810a969431..ee388ccf7196 100644
--- a/drivers/accel/amdxdna/aie4_pci.h
+++ b/drivers/accel/amdxdna/aie4_pci.h
@@ -14,9 +14,13 @@
#include "amdxdna_mailbox.h"
struct amdxdna_dev_priv {
+ const char *npufw_path;
+ const char *certfw_path;
u32 mbox_bar;
u32 mbox_rbuf_bar;
u64 mbox_info_off;
+
+ struct aie_bar_off_pair psp_regs_off[PSP_MAX_REGS];
};
struct amdxdna_dev_hdl {
diff --git a/drivers/accel/amdxdna/aie_psp.c b/drivers/accel/amdxdna/aie_psp.c
index 8743b812a449..458dca7cc5a0 100644
--- a/drivers/accel/amdxdna/aie_psp.c
+++ b/drivers/accel/amdxdna/aie_psp.c
@@ -18,6 +18,7 @@
#define PSP_VALIDATE 1
#define PSP_START 2
#define PSP_RELEASE_TMR 3
+#define PSP_VALIDATE_CERT 4
/* PSP special arguments */
#define PSP_START_COPY_FW 1
@@ -27,10 +28,20 @@
#define PSP_ERROR_BAD_STATE 0xFFFF0007
#define PSP_FW_ALIGN 0x10000
+#define PSP_CFW_ALIGN 0x8000
#define PSP_POLL_INTERVAL 20000 /* us */
#define PSP_POLL_TIMEOUT 1000000 /* us */
-#define PSP_REG(p, reg) ((p)->psp_regs[reg])
+#define PSP_REG(p, reg) ((p)->conf.psp_regs[reg])
+#define PSP_SET_CMD(psp, reg_vals, cmd, arg0, arg1, arg2) \
+({ \
+ u32 *_regs = reg_vals; \
+ u32 _cmd = cmd; \
+ _regs[0] = _cmd; \
+ _regs[1] = arg0; \
+ _regs[2] = arg1; \
+ _regs[3] = ((arg2) | ((_cmd) << 24)) & (psp)->conf.arg2_mask; \
+})
struct psp_device {
struct drm_device *ddev;
@@ -38,7 +49,9 @@ struct psp_device {
u32 fw_buf_sz;
u64 fw_paddr;
void *fw_buffer;
- void __iomem *psp_regs[PSP_MAX_REGS];
+ u32 certfw_buf_sz;
+ u64 certfw_paddr;
+ void *certfw_buffer;
};
static int psp_exec(struct psp_device *psp, u32 *reg_vals)
@@ -47,13 +60,22 @@ static int psp_exec(struct psp_device *psp, u32 *reg_vals)
int ret, i;
u32 ready;
+ /* Check for PSP ready before any write */
+ ret = readx_poll_timeout(readl, PSP_REG(psp, PSP_STATUS_REG), ready,
+ FIELD_GET(PSP_STATUS_READY, ready),
+ PSP_POLL_INTERVAL, PSP_POLL_TIMEOUT);
+ if (ret) {
+ drm_err(psp->ddev, "PSP is not ready, ret 0x%x", ret);
+ return ret;
+ }
+
/* Write command and argument registers */
for (i = 0; i < PSP_NUM_IN_REGS; i++)
writel(reg_vals[i], PSP_REG(psp, i));
/* clear and set PSP INTR register to kick off */
writel(0, PSP_REG(psp, PSP_INTR_REG));
- writel(1, PSP_REG(psp, PSP_INTR_REG));
+ writel(psp->conf.notify_val, PSP_REG(psp, PSP_INTR_REG));
/* PSP should be busy. Wait for ready, so we know task is done. */
ret = readx_poll_timeout(readl, PSP_REG(psp, PSP_STATUS_REG), ready,
@@ -90,69 +112,124 @@ int aie_psp_waitmode_poll(struct psp_device *psp)
void aie_psp_stop(struct psp_device *psp)
{
- u32 reg_vals[PSP_NUM_IN_REGS] = { PSP_RELEASE_TMR, };
+ u32 reg_vals[PSP_NUM_IN_REGS];
int ret;
+ PSP_SET_CMD(psp, reg_vals, PSP_RELEASE_TMR, 0, 0, 0);
+
ret = psp_exec(psp, reg_vals);
if (ret)
drm_err(psp->ddev, "release tmr failed, ret %d", ret);
}
-int aie_psp_start(struct psp_device *psp)
+static int psp_validate_fw(struct psp_device *psp, u8 cmd, u64 paddr, u32 buf_sz)
{
u32 reg_vals[PSP_NUM_IN_REGS];
int ret;
- reg_vals[0] = PSP_VALIDATE;
- reg_vals[1] = lower_32_bits(psp->fw_paddr);
- reg_vals[2] = upper_32_bits(psp->fw_paddr);
- reg_vals[3] = psp->fw_buf_sz;
+ PSP_SET_CMD(psp, reg_vals, cmd, lower_32_bits(paddr),
+ upper_32_bits(paddr), buf_sz);
ret = psp_exec(psp, reg_vals);
- if (ret) {
+ if (ret)
drm_err(psp->ddev, "failed to validate fw, ret %d", ret);
- return ret;
- }
- memset(reg_vals, 0, sizeof(reg_vals));
- reg_vals[0] = PSP_START;
- reg_vals[1] = PSP_START_COPY_FW;
+ return ret;
+}
+
+static int psp_start(struct psp_device *psp)
+{
+ u32 reg_vals[PSP_NUM_IN_REGS];
+ int ret;
+
+ PSP_SET_CMD(psp, reg_vals, PSP_START, PSP_START_COPY_FW, 0, 0);
+
ret = psp_exec(psp, reg_vals);
- if (ret) {
+ if (ret)
drm_err(psp->ddev, "failed to start fw, ret %d", ret);
+
+ return ret;
+}
+
+int aie_psp_start(struct psp_device *psp)
+{
+ int ret;
+
+ ret = psp_validate_fw(psp, PSP_VALIDATE,
+ psp->fw_paddr, psp->fw_buf_sz);
+ if (ret)
return ret;
- }
- return 0;
+ if (!psp->certfw_buf_sz)
+ goto psp_start;
+
+ ret = psp_validate_fw(psp, PSP_VALIDATE_CERT,
+ psp->certfw_paddr, psp->certfw_buf_sz);
+ if (ret)
+ return ret;
+psp_start:
+ return psp_start(psp);
+}
+
+/*
+ * PSP requires host physical address to load firmware.
+ * Allocate a buffer, obtain its physical address, align, and copy data in.
+ */
+static void *psp_alloc_fw_buf(struct psp_device *psp, const void *fw_data,
+ u32 fw_size, u32 align, u32 *buf_sz,
+ u64 *paddr)
+{
+ u32 alloc_sz;
+ void *buffer;
+ u64 offset;
+
+ *buf_sz = ALIGN(fw_size, align);
+ alloc_sz = *buf_sz + align;
+
+ buffer = drmm_kmalloc(psp->ddev, alloc_sz, GFP_KERNEL);
+ if (!buffer)
+ return NULL;
+
+ *paddr = virt_to_phys(buffer);
+ offset = ALIGN(*paddr, align) - *paddr;
+ *paddr += offset;
+ memcpy(buffer + offset, fw_data, fw_size);
+
+ return buffer;
}
struct psp_device *aiem_psp_create(struct drm_device *ddev, struct psp_config *conf)
{
struct psp_device *psp;
- u64 offset;
psp = drmm_kzalloc(ddev, sizeof(*psp), GFP_KERNEL);
if (!psp)
return NULL;
psp->ddev = ddev;
- memcpy(psp->psp_regs, conf->psp_regs, sizeof(psp->psp_regs));
+ psp->fw_buffer = psp_alloc_fw_buf(psp, conf->fw_buf, conf->fw_size,
+ PSP_FW_ALIGN, &psp->fw_buf_sz,
+ &psp->fw_paddr);
+ if (!psp->fw_buffer)
+ return NULL;
+
+ if (!conf->certfw_size) {
+ drm_dbg(ddev, "no cert fw");
+ goto done;
+ }
- psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN);
- psp->fw_buffer = drmm_kmalloc(ddev, psp->fw_buf_sz + PSP_FW_ALIGN, GFP_KERNEL);
- if (!psp->fw_buffer) {
- drm_err(ddev, "no memory for fw buffer");
+ /* CERT firmware */
+ psp->certfw_buffer = psp_alloc_fw_buf(psp, conf->certfw_buf,
+ conf->certfw_size, PSP_CFW_ALIGN,
+ &psp->certfw_buf_sz,
+ &psp->certfw_paddr);
+ if (!psp->certfw_buffer) {
+ drm_err(ddev, "no memory for cert fw buffer");
return NULL;
}
- /*
- * AMD Platform Security Processor(PSP) requires host physical
- * address to load NPU firmware.
- */
- psp->fw_paddr = virt_to_phys(psp->fw_buffer);
- offset = ALIGN(psp->fw_paddr, PSP_FW_ALIGN) - psp->fw_paddr;
- psp->fw_paddr += offset;
- memcpy(psp->fw_buffer + offset, conf->fw_buf, conf->fw_size);
+done:
+ memcpy(&psp->conf, conf, sizeof(psp->conf));
return psp;
}
diff --git a/drivers/accel/amdxdna/npu3_regs.c b/drivers/accel/amdxdna/npu3_regs.c
index f6e20f4858db..fb2bd60b8f00 100644
--- a/drivers/accel/amdxdna/npu3_regs.c
+++ b/drivers/accel/amdxdna/npu3_regs.c
@@ -16,6 +16,15 @@
/* PCIe BAR Index for NPU3 */
#define NPU3_REG_BAR_INDEX 0
+#define NPU3_PSP_BAR_INDEX 4
+
+#define MMNPU_APERTURE3_BASE 0x3810000
+#define NPU3_PSP_BAR_BASE MMNPU_APERTURE3_BASE
+
+#define MPASP_C2PMSG_123_ALT_1 0x3810AEC
+#define MPASP_C2PMSG_156_ALT_1 0x3810B70
+#define MPASP_C2PMSG_157_ALT_1 0x3810B74
+#define MPASP_C2PMSG_73_ALT_1 0x3810A24
static const struct amdxdna_fw_feature_tbl npu3_fw_feature_table[] = {
{ .major = 5, .min_minor = 10 },
@@ -23,14 +32,28 @@ static const struct amdxdna_fw_feature_tbl npu3_fw_feature_table[] = {
};
static const struct amdxdna_dev_priv npu3_dev_priv = {
+ .npufw_path = "npu.dev.sbin",
+ .certfw_path = "cert.dev.sbin",
.mbox_bar = NPU3_MBOX_BAR,
.mbox_rbuf_bar = NPU3_MBOX_BUFFER_BAR,
.mbox_info_off = NPU3_MBOX_INFO_OFF,
+ .psp_regs_off = {
+ DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU3_PSP, MPASP_C2PMSG_123_ALT_1),
+ DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU3_PSP, MPASP_C2PMSG_156_ALT_1),
+ DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU3_PSP, MPASP_C2PMSG_157_ALT_1),
+ DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU3_PSP, MPASP_C2PMSG_123_ALT_1),
+ DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU3_PSP, MPASP_C2PMSG_73_ALT_1),
+ DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU3_PSP, MPASP_C2PMSG_123_ALT_1),
+ DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU3_PSP, MPASP_C2PMSG_156_ALT_1),
+ /* npu3 doesn't use 8th pwaitmode register */
+ },
+
Spurious whitespace here that you ping pong in the later patches.
};
const struct amdxdna_dev_info dev_npu3_pf_info = {
.mbox_bar = NPU3_MBOX_BAR,
.sram_bar = NPU3_MBOX_BUFFER_BAR,
+ .psp_bar = NPU3_PSP_BAR_INDEX,
.vbnv = "RyzenAI-npu3-pf",
.device_type = AMDXDNA_DEV_TYPE_PF,
.dev_priv = &npu3_dev_priv,