Re: [PATCH V1 4/6] accel/amdxdna: Add command doorbell and wait support
From: Mario Limonciello
Date: Wed May 06 2026 - 12:34:02 EST
On 5/6/26 11:11, Lizhi Hou wrote:
On 5/5/26 13:31, Mario Limonciello wrote:
On 5/5/26 11:09, Lizhi Hou wrote:
From: David Zhang <yidong.zhang@xxxxxxx>Reviewed-by: Mario Limonciello (AMD) <superm1@xxxxxxxxxx>
Expose the command doorbell register to userspace on a per-hardware
context basis, enabling applications to notify the firmware of pending
commands via doorbell writes.
Introduce DRM_IOCTL_AMDXDNA_WAIT_CMD to allow userspace to wait for
completion of individual commands.
Co-developed-by: Hayden Laccabue <Hayden.Laccabue@xxxxxxx>
Signed-off-by: Hayden Laccabue <Hayden.Laccabue@xxxxxxx>
Signed-off-by: David Zhang <yidong.zhang@xxxxxxx>
Signed-off-by: Lizhi Hou <lizhi.hou@xxxxxxx>
Minor suggestion below.
---
drivers/accel/amdxdna/aie4_ctx.c | 75 +++++++++++++++++++++++++
drivers/accel/amdxdna/aie4_host_queue.h | 2 +
drivers/accel/amdxdna/aie4_pci.c | 34 +++++++++++
drivers/accel/amdxdna/aie4_pci.h | 3 +
drivers/accel/amdxdna/amdxdna_ctx.c | 34 +++++++++++
drivers/accel/amdxdna/amdxdna_ctx.h | 4 +-
drivers/accel/amdxdna/amdxdna_gem.c | 5 +-
drivers/accel/amdxdna/amdxdna_pci_drv.c | 18 +++++-
drivers/accel/amdxdna/amdxdna_pci_drv.h | 3 +
drivers/accel/amdxdna/npu3_regs.c | 5 ++
include/uapi/drm/amdxdna_accel.h | 22 +++++++-
11 files changed, 198 insertions(+), 7 deletions(-)
diff --git a/drivers/accel/amdxdna/aie4_ctx.c b/drivers/accel/ amdxdna/aie4_ctx.c
index 84ac706d0ffb..8408b0d2696f 100644
--- a/drivers/accel/amdxdna/aie4_ctx.c
+++ b/drivers/accel/amdxdna/aie4_ctx.c
@@ -256,3 +256,78 @@ void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx)
aie4_hwctx_umq_fini(hwctx);
kfree(hwctx->priv);
}
+
+static inline bool valid_queue_index(u64 read, u64 write, u32 capacity)
+{
+ return (write >= read) && ((write - read) <= capacity);
+}
+
+static u64 get_read_index(struct amdxdna_hwctx *hwctx)
+{
+ u64 wi = READ_ONCE(*hwctx->priv->umq_write_index);
+ u64 ri = READ_ONCE(*hwctx->priv->umq_read_index);
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+
+ /*
+ * CERT cannot update read index as uint64 atomically. Driver may read
+ * half-updated read index when it has bits in high 32bit. In case read
+ * index is not valid, wait for some time and retry once. It should
+ * allow CERT to complete the read index update.
+ */
+ if (!valid_queue_index(ri, wi, CTX_MAX_CMDS)) {
+ XDNA_WARN(xdna, "Invalid index, ri %llu, wi %llu", ri, wi);
+ usleep_range(100, 200);
+ ri = READ_ONCE(*hwctx->priv->umq_read_index);
+ if (!valid_queue_index(ri, wi, CTX_MAX_CMDS)) {
+ XDNA_ERR(xdna, "Invalid index after retry, ri %llu, wi %llu", ri, wi);
+ ri = 0;
+ }
+ }
+
+ return ri;
+}
+
+static inline bool check_cmd_done(struct amdxdna_hwctx *hwctx, u64 seq)
+{
+ u64 read_idx = get_read_index(hwctx);
+
+ return read_idx > seq;
+}
+
+int aie4_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout)
+{
+ unsigned long wait_jifs = MAX_SCHEDULE_TIMEOUT;
+ struct amdxdna_hwctx_priv *priv = hwctx->priv;
+ struct cert_comp *cert_comp = priv->cert_comp;
+ long ret;
Not sure I see the point in making ret a long. wait_event_interruptible_timeout() retun 0 or 1.
Other than 0 or 1, wait_event_interruptible_timeout() can also return the remaining jiffies and -ERESTARTSYS
Ah thanks.
Lizhi
bool val;
val = wait_event_interruptible_timeout()
return val ? 0 : -ETIME;
+
+ if (timeout)
+ wait_jifs = msecs_to_jiffies(timeout);
+
+ ret = wait_event_interruptible_timeout(cert_comp->waitq,
+ (check_cmd_done(hwctx, seq)),
+ wait_jifs);
+
+ if (!ret)
+ ret = -ETIME;
+
+ return ret <= 0 ? ret : 0;
+}
+
+int aie4_hwctx_valid_doorbell(struct amdxdna_client *client, u32 vm_pgoff)
+{
+ struct amdxdna_hwctx *hwctx;
+ unsigned long hwctx_id;
+ int idx;
+
+ idx = srcu_read_lock(&client->hwctx_srcu);
+ amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
+ if (vm_pgoff == (hwctx->doorbell_offset >> PAGE_SHIFT)) {
+ srcu_read_unlock(&client->hwctx_srcu, idx);
+ return 1;
+ }
+ }
+ srcu_read_unlock(&client->hwctx_srcu, idx);
+
+ return 0;
+}
diff --git a/drivers/accel/amdxdna/aie4_host_queue.h b/drivers/accel/ amdxdna/aie4_host_queue.h
index eb6a38dfb53e..1b33eda3f727 100644
--- a/drivers/accel/amdxdna/aie4_host_queue.h
+++ b/drivers/accel/amdxdna/aie4_host_queue.h
@@ -8,6 +8,8 @@
#include <linux/types.h>
+#define CTX_MAX_CMDS 32
+
struct host_queue_header {
__u64 read_index;
struct {
diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/ amdxdna/aie4_pci.c
index 3be9066b7178..9ff34ce57fcb 100644
--- a/drivers/accel/amdxdna/aie4_pci.c
+++ b/drivers/accel/amdxdna/aie4_pci.c
@@ -503,6 +503,38 @@ static int aie4m_pcidev_init(struct amdxdna_dev *xdna)
return 0;
}
+static int aie4_doorbell_mmap(struct amdxdna_client *client, struct vm_area_struct *vma)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
+ const struct amdxdna_dev_priv *npriv = xdna->dev_info->dev_priv;
+ phys_addr_t res_start;
+ unsigned long pfn;
+ int ret;
+
+ if (!aie4_hwctx_valid_doorbell(client, vma->vm_pgoff)) {
+ XDNA_ERR(xdna, "Invalid doorbell page offset 0x%lx", vma- >vm_pgoff);
+ return -EINVAL;
+ }
+
+ if (vma_pages(vma) != 1) {
+ XDNA_ERR(xdna, "can only map one page, got %ld", vma_pages(vma));
+ return -EINVAL;
+ }
+
+ res_start = pci_resource_start(pdev, xdna->dev_info- >doorbell_bar) + npriv->doorbell_off;
+ pfn = PHYS_PFN(res_start) + vma->vm_pgoff;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ vm_flags_set(vma, VM_IO | VM_DONTEXPAND | VM_DONTDUMP);
+ ret = io_remap_pfn_range(vma, vma->vm_start,
+ pfn,
+ PAGE_SIZE,
+ vma->vm_page_prot);
+
+ XDNA_DBG(xdna, "doorbell ret %d", ret);
+ return ret;
+}
+
static int aie4_pf_init(struct amdxdna_dev *xdna)
{
int ret;
@@ -547,4 +579,6 @@ const struct amdxdna_dev_ops aie4_vf_ops = {
.fini = aie4_vf_fini,
.hwctx_init = aie4_hwctx_init,
.hwctx_fini = aie4_hwctx_fini,
+ .mmap = aie4_doorbell_mmap,
+ .cmd_wait = aie4_cmd_wait,
};
diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/ amdxdna/aie4_pci.h
index 6103007e6d2f..b69489acd53d 100644
--- a/drivers/accel/amdxdna/aie4_pci.h
+++ b/drivers/accel/amdxdna/aie4_pci.h
@@ -36,6 +36,7 @@ struct amdxdna_dev_priv {
u32 mbox_bar;
u32 mbox_rbuf_bar;
u64 mbox_info_off;
+ u32 doorbell_off;
struct aie_bar_off_pair psp_regs_off[PSP_MAX_REGS];
struct aie_bar_off_pair smu_regs_off[SMU_MAX_REGS];
@@ -60,6 +61,8 @@ int aie4_suspend_fw(struct amdxdna_dev_hdl *ndev);
/* aie4_ctx.c */
int aie4_hwctx_init(struct amdxdna_hwctx *hwctx);
void aie4_hwctx_fini(struct amdxdna_hwctx *hwctx);
+int aie4_cmd_wait(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout);
+int aie4_hwctx_valid_doorbell(struct amdxdna_client *client, u32 vm_pgoff);
/* aie4_sriov.c */
#if IS_ENABLED(CONFIG_PCI_IOV)
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/ amdxdna/amdxdna_ctx.c
index b5ad60d4b734..b79229a63af3 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.c
+++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -627,3 +627,37 @@ int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_
XDNA_ERR(client->xdna, "Invalid command type %d", args->type);
return -EINVAL;
}
+
+int amdxdna_drm_wait_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_drm_wait_cmd *args = data;
+ struct amdxdna_hwctx *hwctx;
+ int ret, idx;
+
+ XDNA_DBG(xdna, "PID %d ctx %d timeout set %d ms for cmd %llu",
+ client->pid, args->hwctx, args->timeout, args->seq);
+
+ if (!xdna->dev_info->ops->cmd_wait)
+ return -EOPNOTSUPP;
+
+ idx = srcu_read_lock(&client->hwctx_srcu);
+ hwctx = xa_load(&client->hwctx_xa, args->hwctx);
+ if (!hwctx) {
+ XDNA_DBG(xdna, "PID %d failed to get ctx %d", client->pid, args->hwctx);
+ ret = -EINVAL;
+ goto unlock_ctx_srcu;
+ }
+
+ ret = xdna->dev_info->ops->cmd_wait(hwctx, args->seq, args- >timeout);
+
+ XDNA_DBG(xdna, "PID %d ctx %d cmd %lld wait finished, ret %d",
+ client->pid, args->hwctx, args->seq, ret);
+
+ trace_amdxdna_debug_point(current->comm, args->seq, "job returned to user");
+
+unlock_ctx_srcu:
+ srcu_read_unlock(&client->hwctx_srcu, idx);
+ return ret;
+}
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/ amdxdna/amdxdna_ctx.h
index c5622718b4d5..6e3c6371a088 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.h
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -211,12 +211,10 @@ int amdxdna_cmd_submit(struct amdxdna_client *client,
u32 *arg_bo_hdls, u32 arg_bo_cnt,
u32 hwctx_hdl, u64 *seq);
-int amdxdna_cmd_wait(struct amdxdna_client *client, u32 hwctx_hdl,
- u64 seq, u32 timeout);
-
int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdxdna_drm_wait_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
#endif /* _AMDXDNA_CTX_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/ amdxdna/amdxdna_gem.c
index ebfc472aa9e7..319d2064fafa 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.c
+++ b/drivers/accel/amdxdna/amdxdna_gem.c
@@ -212,7 +212,8 @@ static bool amdxdna_hmm_invalidate(struct mmu_interval_notifier *mni,
mmu_interval_set_seq(&mapp->notifier, cur_seq);
up_write(&xdna->notifier_lock);
- xdna->dev_info->ops->hmm_invalidate(abo, cur_seq);
+ if (xdna->dev_info->ops->hmm_invalidate)
+ xdna->dev_info->ops->hmm_invalidate(abo, cur_seq);
if (range->event == MMU_NOTIFY_UNMAP) {
down_write(&xdna->notifier_lock);
@@ -295,7 +296,7 @@ static int amdxdna_hmm_register(struct amdxdna_gem_obj *abo,
u32 nr_pages;
int ret;
- if (!xdna->dev_info->ops->hmm_invalidate)
+ if (!amdxdna_pasid_on(abo->client))
return 0;
mapp = kzalloc_obj(*mapp);
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/ amdxdna/amdxdna_pci_drv.c
index 39ad081ac082..c0d00db25cde 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -224,6 +224,21 @@ static int amdxdna_drm_set_state_ioctl(struct drm_device *dev, void *data, struc
return ret;
}
+static int amdxdna_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct drm_file *drm_filp = filp->private_data;
+ struct amdxdna_client *client = drm_filp->driver_priv;
+ struct amdxdna_dev *xdna = client->xdna;
+
+ if (likely(vma->vm_pgoff >= DRM_FILE_PAGE_OFFSET_START))
+ return drm_gem_mmap(filp, vma);
+
+ if (!xdna->dev_info->ops->mmap)
+ return -EOPNOTSUPP;
+
+ return xdna->dev_info->ops->mmap(client, vma);
+}
+
static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
/* Context */
DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX, amdxdna_drm_create_hwctx_ioctl, 0),
@@ -235,6 +250,7 @@ static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(AMDXDNA_SYNC_BO, amdxdna_drm_sync_bo_ioctl, 0),
/* Execution */
DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, amdxdna_drm_submit_cmd_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(AMDXDNA_WAIT_CMD, amdxdna_drm_wait_cmd_ioctl, 0),
/* AIE hardware */
DRM_IOCTL_DEF_DRV(AMDXDNA_GET_INFO, amdxdna_drm_get_info_ioctl, 0),
DRM_IOCTL_DEF_DRV(AMDXDNA_GET_ARRAY, amdxdna_drm_get_array_ioctl, 0),
@@ -281,7 +297,7 @@ static const struct file_operations amdxdna_fops = {
.poll = drm_poll,
.read = drm_read,
.llseek = noop_llseek,
- .mmap = drm_gem_mmap,
+ .mmap = amdxdna_drm_gem_mmap,
.show_fdinfo = drm_show_fdinfo,
.fop_flags = FOP_UNSIGNED_OFFSET,
};
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/ amdxdna/amdxdna_pci_drv.h
index caed11c09e55..471b72299aee 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -56,12 +56,14 @@ struct amdxdna_dev_ops {
int (*resume)(struct amdxdna_dev *xdna);
int (*suspend)(struct amdxdna_dev *xdna);
int (*sriov_configure)(struct amdxdna_dev *xdna, int num_vfs);
+ int (*mmap)(struct amdxdna_client *client, struct vm_area_struct *vma);
int (*hwctx_init)(struct amdxdna_hwctx *hwctx);
void (*hwctx_fini)(struct amdxdna_hwctx *hwctx);
int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
int (*hwctx_sync_debug_bo)(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl);
void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
+ int (*cmd_wait)(struct amdxdna_hwctx *hwctx, u64 seq, u32 timeout);
int (*get_aie_info)(struct amdxdna_client *client, struct amdxdna_drm_get_info *args);
int (*set_aie_state)(struct amdxdna_client *client, struct amdxdna_drm_set_state *args);
int (*get_array)(struct amdxdna_client *client, struct amdxdna_drm_get_array *args);
@@ -85,6 +87,7 @@ struct amdxdna_dev_info {
int sram_bar;
int psp_bar;
int smu_bar;
+ int doorbell_bar;
int device_type;
int first_col;
u32 dev_mem_buf_shift;
diff --git a/drivers/accel/amdxdna/npu3_regs.c b/drivers/accel/ amdxdna/npu3_regs.c
index 6d5da779232b..d76b2e99c308 100644
--- a/drivers/accel/amdxdna/npu3_regs.c
+++ b/drivers/accel/amdxdna/npu3_regs.c
@@ -14,6 +14,9 @@
#define NPU3_MBOX_BUFFER_BAR 2
#define NPU3_MBOX_INFO_OFF 0x0
+#define NPU3_DOORBELL_BAR 2
+#define NPU3_DOORBELL_OFF 0x0
+
/* PCIe BAR Index for NPU3 */
#define NPU3_REG_BAR_INDEX 0
#define NPU3_PSP_BAR_INDEX 4
@@ -45,6 +48,7 @@ static const struct amdxdna_dev_priv npu3_dev_priv = {
.mbox_bar = NPU3_MBOX_BAR,
.mbox_rbuf_bar = NPU3_MBOX_BUFFER_BAR,
.mbox_info_off = NPU3_MBOX_INFO_OFF,
+ .doorbell_off = NPU3_DOORBELL_OFF,
.psp_regs_off = {
DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU3_PSP, MPASP_C2PMSG_123_ALT_1),
DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU3_PSP, MPASP_C2PMSG_156_ALT_1),
@@ -87,6 +91,7 @@ const struct amdxdna_dev_info dev_npu3_pf_info = {
const struct amdxdna_dev_info dev_npu3_vf_info = {
.mbox_bar = NPU3_MBOX_BAR,
.sram_bar = NPU3_MBOX_BUFFER_BAR,
+ .doorbell_bar = NPU3_DOORBELL_BAR,
.default_vbnv = "RyzenAI-npu3-vf",
.device_type = AMDXDNA_DEV_TYPE_UMQ,
.dev_priv = &npu3_dev_vf_priv,
diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/ amdxdna_accel.h
index ad9b33dd7b13..51a507561df6 100644
--- a/include/uapi/drm/amdxdna_accel.h
+++ b/include/uapi/drm/amdxdna_accel.h
@@ -45,7 +45,8 @@ enum amdxdna_drm_ioctl_id {
DRM_AMDXDNA_EXEC_CMD,
DRM_AMDXDNA_GET_INFO,
DRM_AMDXDNA_SET_STATE,
- DRM_AMDXDNA_GET_ARRAY = 10,
+ DRM_AMDXDNA_WAIT_CMD,
+ DRM_AMDXDNA_GET_ARRAY,
};
/**
@@ -274,6 +275,21 @@ struct amdxdna_drm_exec_cmd {
__u64 seq;
};
+/**
+ * struct amdxdna_drm_wait_cmd - Wait execution command.
+ *
+ * @hwctx: Context handle.
+ * @timeout: timeout in ms, 0 implies infinite wait.
+ * @seq: sequence number of the command returned by execute command.
+ *
+ * Wait a command specified by seq to be completed.
+ */
+struct amdxdna_drm_wait_cmd {
+ __u32 hwctx;
+ __u32 timeout;
+ __u64 seq;
+};
+
/**
* struct amdxdna_drm_query_aie_status - Query the status of the AIE hardware
* @buffer: The user space buffer that will return the AIE status.
@@ -739,6 +755,10 @@ struct amdxdna_drm_set_power_mode {
DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_ARRAY, \
struct amdxdna_drm_get_array)
+#define DRM_IOCTL_AMDXDNA_WAIT_CMD \
+ DRM_IOW(DRM_COMMAND_BASE + DRM_AMDXDNA_WAIT_CMD, \
+ struct amdxdna_drm_wait_cmd)
+
#if defined(__cplusplus)
} /* extern c end */
#endif