[PATCH V2 3/4] drivers/fpga/amd: Add remote queue

From: Yidong Zhang
Date: Tue Dec 10 2024 - 13:38:52 EST


The remote queue is a hardware queue based ring buffer service between mgmt
PF and PCIe hardware firmware for programming FPGA Program Logic, loading
versal firmware and checking card healthy status.

Co-developed-by: Nishad Saraf <nishads@xxxxxxx>
Signed-off-by: Nishad Saraf <nishads@xxxxxxx>
Co-developed-by: Prapul Krishnamurthy <prapulk@xxxxxxx>
Signed-off-by: Prapul Krishnamurthy <prapulk@xxxxxxx>
Signed-off-by: Yidong Zhang <yidong.zhang@xxxxxxx>
---
drivers/fpga/amd/Makefile | 3 +-
drivers/fpga/amd/versal-pci-rm-queue.c | 326 +++++++++++++++++++++++
drivers/fpga/amd/versal-pci-rm-queue.h | 21 ++
drivers/fpga/amd/versal-pci-rm-service.h | 209 +++++++++++++++
4 files changed, 558 insertions(+), 1 deletion(-)
create mode 100644 drivers/fpga/amd/versal-pci-rm-queue.c
create mode 100644 drivers/fpga/amd/versal-pci-rm-queue.h
create mode 100644 drivers/fpga/amd/versal-pci-rm-service.h

diff --git a/drivers/fpga/amd/Makefile b/drivers/fpga/amd/Makefile
index 7a604785e5f9..b2ffdbf23400 100644
--- a/drivers/fpga/amd/Makefile
+++ b/drivers/fpga/amd/Makefile
@@ -3,4 +3,5 @@
obj-$(CONFIG_AMD_VERSAL_PCI) += versal-pci.o

versal-pci-$(CONFIG_AMD_VERSAL_PCI) := versal-pci-main.o \
- versal-pci-comm-chan.o
+ versal-pci-comm-chan.o \
+ versal-pci-rm-queue.o
diff --git a/drivers/fpga/amd/versal-pci-rm-queue.c b/drivers/fpga/amd/versal-pci-rm-queue.c
new file mode 100644
index 000000000000..92f2e1165052
--- /dev/null
+++ b/drivers/fpga/amd/versal-pci-rm-queue.c
@@ -0,0 +1,326 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for Versal PCIe device
+ *
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+ */
+
+#include <linux/pci.h>
+
+#include "versal-pci.h"
+#include "versal-pci-rm-queue.h"
+#include "versal-pci-rm-service.h"
+
+static inline struct rm_device *to_rdev_msg_monitor(struct work_struct *w)
+{
+ return container_of(w, struct rm_device, msg_monitor);
+}
+
+static inline struct rm_device *to_rdev_msg_timer(struct timer_list *t)
+{
+ return container_of(t, struct rm_device, msg_timer);
+}
+
+static inline u32 rm_io_read(struct rm_device *rdev, u32 offset)
+{
+ /* TODO */
+ return 0;
+}
+
+static inline int rm_io_write(struct rm_device *rdev, u32 offset, u32 value)
+{
+ /* TODO */
+ return 0;
+}
+
+static inline u32 rm_queue_read(struct rm_device *rdev, u32 offset)
+{
+ /* TODO */
+ return 0;
+}
+
+static inline void rm_queue_write(struct rm_device *rdev, u32 offset, u32 value)
+{
+ /* TODO */
+}
+
+static inline void rm_queue_bulk_read(struct rm_device *rdev, u32 offset,
+ u32 *value, u32 size)
+{
+ /* TODO */
+}
+
+static inline void rm_queue_bulk_write(struct rm_device *rdev, u32 offset,
+ u32 *value, u32 size)
+{
+ /* TODO */
+}
+
+static inline u32 rm_queue_get_cidx(struct rm_device *rdev, enum rm_queue_type type)
+{
+ u32 off;
+
+ if (type == RM_QUEUE_SQ)
+ off = offsetof(struct rm_queue_header, sq_cidx);
+ else
+ off = offsetof(struct rm_queue_header, cq_cidx);
+
+ return rm_queue_read(rdev, off);
+}
+
+static inline void rm_queue_set_cidx(struct rm_device *rdev, enum rm_queue_type type,
+ u32 value)
+{
+ u32 off;
+
+ if (type == RM_QUEUE_SQ)
+ off = offsetof(struct rm_queue_header, sq_cidx);
+ else
+ off = offsetof(struct rm_queue_header, cq_cidx);
+
+ rm_queue_write(rdev, off, value);
+}
+
+static inline u32 rm_queue_get_pidx(struct rm_device *rdev, enum rm_queue_type type)
+{
+ if (type == RM_QUEUE_SQ)
+ return rm_io_read(rdev, RM_IO_SQ_PIDX_OFF);
+ else
+ return rm_io_read(rdev, RM_IO_CQ_PIDX_OFF);
+}
+
+static inline int rm_queue_set_pidx(struct rm_device *rdev,
+ enum rm_queue_type type, u32 value)
+{
+ if (type == RM_QUEUE_SQ)
+ return rm_io_write(rdev, RM_IO_SQ_PIDX_OFF, value);
+ else
+ return rm_io_write(rdev, RM_IO_CQ_PIDX_OFF, value);
+}
+
+static inline u32 rm_queue_get_sq_slot_offset(struct rm_device *rdev)
+{
+ u32 index;
+
+ if ((rdev->sq.pidx - rdev->sq.cidx) >= rdev->queue_size)
+ return RM_INVALID_SLOT;
+
+ index = rdev->sq.pidx & (rdev->queue_size - 1);
+ return rdev->sq.offset + RM_CMD_SQ_SLOT_SIZE * index;
+}
+
+static inline u32 rm_queue_get_cq_slot_offset(struct rm_device *rdev)
+{
+ u32 index;
+
+ index = rdev->cq.cidx & (rdev->queue_size - 1);
+ return rdev->cq.offset + RM_CMD_CQ_SLOT_SIZE * index;
+}
+
+static int rm_queue_submit_cmd(struct rm_cmd *cmd)
+{
+ struct versal_pci_device *vdev = cmd->rdev->vdev;
+ struct rm_device *rdev = cmd->rdev;
+ u32 offset;
+ int ret;
+
+ mutex_lock(&rdev->queue);
+
+ offset = rm_queue_get_sq_slot_offset(rdev);
+ if (!offset) {
+ vdev_err(vdev, "No SQ slot available");
+ ret = -ENOSPC;
+ goto exit;
+ }
+
+ rm_queue_bulk_write(rdev, offset, (u32 *)&cmd->sq_msg,
+ sizeof(cmd->sq_msg));
+
+ ret = rm_queue_set_pidx(rdev, RM_QUEUE_SQ, ++rdev->sq.pidx);
+ if (ret) {
+ vdev_err(vdev, "Failed to update PIDX, ret %d", ret);
+ goto exit;
+ }
+
+ list_add_tail(&cmd->list, &rdev->submitted_cmds);
+exit:
+ mutex_unlock(&rdev->queue);
+ return ret;
+}
+
+void rm_queue_withdraw_cmd(struct rm_cmd *cmd)
+{
+ mutex_lock(&cmd->rdev->queue);
+ list_del(&cmd->list);
+ mutex_unlock(&cmd->rdev->queue);
+}
+
+static int rm_queue_wait_cmd_timeout(struct rm_cmd *cmd, unsigned long timeout)
+{
+ struct versal_pci_device *vdev = cmd->rdev->vdev;
+ int ret;
+
+ if (wait_for_completion_timeout(&cmd->executed, timeout)) {
+ ret = cmd->cq_msg.data.rcode;
+ if (!ret)
+ return 0;
+
+ vdev_err(vdev, "CMD returned with a failure: %d", ret);
+ return ret;
+ }
+
+ /*
+ * each cmds will be cleaned up by complete before it times out.
+ * if we reach here, the cmd should be cleared and hot reset should
+ * be issued.
+ */
+ vdev_err(vdev, "cmd is timedout after, please reset the card");
+ rm_queue_withdraw_cmd(cmd);
+ return -ETIME;
+}
+
+int rm_queue_send_cmd(struct rm_cmd *cmd, unsigned long timeout)
+{
+ int ret;
+
+ ret = rm_queue_submit_cmd(cmd);
+ if (ret)
+ return ret;
+
+ return rm_queue_wait_cmd_timeout(cmd, timeout);
+}
+
+static int rm_process_msg(struct rm_device *rdev)
+{
+ struct versal_pci_device *vdev = rdev->vdev;
+ struct rm_cmd *cmd, *next;
+ struct rm_cmd_cq_hdr header;
+ u32 offset;
+
+ offset = rm_queue_get_cq_slot_offset(rdev);
+ if (!offset) {
+ vdev_err(vdev, "Invalid CQ offset");
+ return -EINVAL;
+ }
+
+ rm_queue_bulk_read(rdev, offset, (u32 *)&header, sizeof(header));
+
+ list_for_each_entry_safe(cmd, next, &rdev->submitted_cmds, list) {
+ u32 value = 0;
+
+ if (cmd->sq_msg.hdr.id != header.id)
+ continue;
+
+ rm_queue_bulk_read(rdev, offset + sizeof(cmd->cq_msg.hdr),
+ (u32 *)&cmd->cq_msg.data,
+ sizeof(cmd->cq_msg.data));
+
+ rm_queue_write(rdev, offset, value);
+
+ list_del(&cmd->list);
+ complete(&cmd->executed);
+ return 0;
+ }
+
+ vdev_err(vdev, "Unknown cmd ID %d found in CQ", header.id);
+ return -EFAULT;
+}
+
+static void rm_check_msg(struct work_struct *w)
+{
+ struct rm_device *rdev = to_rdev_msg_monitor(w);
+ int ret;
+
+ mutex_lock(&rdev->queue);
+
+ rdev->sq.cidx = rm_queue_get_cidx(rdev, RM_QUEUE_SQ);
+ rdev->cq.pidx = rm_queue_get_pidx(rdev, RM_QUEUE_CQ);
+
+ while (rdev->cq.cidx < rdev->cq.pidx) {
+ ret = rm_process_msg(rdev);
+ if (ret)
+ break;
+
+ rdev->cq.cidx++;
+
+ rm_queue_set_cidx(rdev, RM_QUEUE_CQ, rdev->cq.cidx);
+ }
+
+ mutex_unlock(&rdev->queue);
+}
+
+static void rm_sched_work(struct timer_list *t)
+{
+ struct rm_device *rdev = to_rdev_msg_timer(t);
+
+ /* Schedule a work in the general workqueue */
+ schedule_work(&rdev->msg_monitor);
+ /* Periodic timer */
+ mod_timer(&rdev->msg_timer, jiffies + RM_COMPLETION_TIMER);
+}
+
+void rm_queue_fini(struct rm_device *rdev)
+{
+ del_timer_sync(&rdev->msg_timer);
+ cancel_work_sync(&rdev->msg_monitor);
+ mutex_destroy(&rdev->queue);
+}
+
+int rm_queue_init(struct rm_device *rdev)
+{
+ struct versal_pci_device *vdev = rdev->vdev;
+ struct rm_queue_header header = {0};
+ int ret;
+
+ INIT_LIST_HEAD(&rdev->submitted_cmds);
+ mutex_init(&rdev->queue);
+
+ rm_queue_bulk_read(rdev, RM_HDR_OFF, (u32 *)&header, sizeof(header));
+
+ if (header.magic != RM_QUEUE_HDR_MAGIC_NUM) {
+ vdev_err(vdev, "Invalid RM queue header");
+ ret = -ENODEV;
+ goto error;
+ }
+
+ if (!header.version) {
+ vdev_err(vdev, "Invalid RM queue header");
+ ret = -ENODEV;
+ goto error;
+ }
+
+ sema_init(&rdev->sq.data_lock, 1);
+ sema_init(&rdev->cq.data_lock, 1);
+ rdev->queue_size = header.size;
+ rdev->sq.offset = header.sq_off;
+ rdev->cq.offset = header.cq_off;
+ rdev->sq.type = RM_QUEUE_SQ;
+ rdev->cq.type = RM_QUEUE_CQ;
+ rdev->sq.data_size = rdev->queue_buffer_size - RM_CMD_CQ_BUFFER_SIZE;
+ rdev->cq.data_size = RM_CMD_CQ_BUFFER_SIZE;
+ rdev->sq.data_offset = rdev->queue_buffer_start +
+ RM_CMD_CQ_BUFFER_OFFSET + RM_CMD_CQ_BUFFER_SIZE;
+ rdev->cq.data_offset = rdev->queue_buffer_start +
+ RM_CMD_CQ_BUFFER_OFFSET;
+ rdev->sq.cidx = header.sq_cidx;
+ rdev->cq.cidx = header.cq_cidx;
+
+ rdev->sq.pidx = rm_queue_get_pidx(rdev, RM_QUEUE_SQ);
+ rdev->cq.pidx = rm_queue_get_pidx(rdev, RM_QUEUE_CQ);
+
+ if (rdev->cq.cidx != rdev->cq.pidx) {
+ vdev_warn(vdev, "Clearing stale completions");
+ rdev->cq.cidx = rdev->cq.pidx;
+ rm_queue_set_cidx(rdev, RM_QUEUE_CQ, rdev->cq.cidx);
+ }
+
+ /* Create and schedule timer to do recurring work */
+ INIT_WORK(&rdev->msg_monitor, &rm_check_msg);
+ timer_setup(&rdev->msg_timer, &rm_sched_work, 0);
+ mod_timer(&rdev->msg_timer, jiffies + RM_COMPLETION_TIMER);
+
+ return 0;
+error:
+ mutex_destroy(&rdev->queue);
+ return ret;
+}
diff --git a/drivers/fpga/amd/versal-pci-rm-queue.h b/drivers/fpga/amd/versal-pci-rm-queue.h
new file mode 100644
index 000000000000..3bc7102c6a58
--- /dev/null
+++ b/drivers/fpga/amd/versal-pci-rm-queue.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Driver for Versal PCIe device
+ *
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+ */
+
+#ifndef __RM_QUEUE_H
+#define __RM_QUEUE_H
+
+struct rm_device;
+
+/* rm queue hardware setup */
+int rm_queue_init(struct rm_device *rdev);
+void rm_queue_fini(struct rm_device *rdev);
+
+/* rm queue common API */
+int rm_queue_send_cmd(struct rm_cmd *cmd, unsigned long timeout);
+void rm_queue_withdraw_cmd(struct rm_cmd *cmd);
+
+#endif /* __RM_QUEUE_H */
diff --git a/drivers/fpga/amd/versal-pci-rm-service.h b/drivers/fpga/amd/versal-pci-rm-service.h
new file mode 100644
index 000000000000..85a78257770a
--- /dev/null
+++ b/drivers/fpga/amd/versal-pci-rm-service.h
@@ -0,0 +1,209 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Driver for Versal PCIe device
+ *
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+ */
+
+#ifndef __RM_SERVICE_H
+#define __RM_SERVICE_H
+
+#define RM_HDR_OFF 0x0
+#define RM_HDR_MAGIC_NUM 0x564D5230
+#define RM_QUEUE_HDR_MAGIC_NUM 0x5847513F
+#define RM_PCI_IO_BAR_OFF 0x2010000
+#define RM_PCI_IO_SIZE SZ_4K
+#define RM_PCI_SHMEM_BAR_OFF 0x8000000
+#define RM_PCI_SHMEM_SIZE SZ_128M
+#define RM_PCI_SHMEM_HDR_SIZE 0x28
+
+#define RM_QUEUE_HDR_MAGIC_NUM_OFF 0x0
+#define RM_IO_SQ_PIDX_OFF 0x0
+#define RM_IO_CQ_PIDX_OFF 0x100
+
+#define RM_CMD_ID_MIN 1
+#define RM_CMD_ID_MAX (BIT(17) - 1)
+#define RM_CMD_SQ_HDR_OPS_MSK GENMASK(15, 0)
+#define RM_CMD_SQ_HDR_SIZE_MSK GENMASK(14, 0)
+#define RM_CMD_SQ_SLOT_SIZE SZ_512
+#define RM_CMD_CQ_SLOT_SIZE SZ_16
+#define RM_CMD_CQ_BUFFER_SIZE (1024 * 1024)
+#define RM_CMD_CQ_BUFFER_OFFSET 0x0
+#define RM_CMD_LOG_PAGE_TYPE_MASK GENMASK(15, 0)
+#define RM_CMD_VMR_CONTROL_MSK GENMASK(10, 8)
+#define RM_CMD_VMR_CONTROL_PS_MASK BIT(9)
+
+#define RM_CMD_WAIT_CONFIG_TIMEOUT msecs_to_jiffies(10 * 1000)
+#define RM_CMD_WAIT_DOWNLOAD_TIMEOUT msecs_to_jiffies(300 * 1000)
+
+#define RM_COMPLETION_TIMER (HZ / 10)
+#define RM_HEALTH_CHECK_TIMER (HZ)
+
+#define RM_INVALID_SLOT 0
+
+enum rm_queue_opcode {
+ RM_QUEUE_OP_LOAD_XCLBIN = 0x0,
+ RM_QUEUE_OP_GET_LOG_PAGE = 0x8,
+ RM_QUEUE_OP_LOAD_FW = 0xA,
+ RM_QUEUE_OP_LOAD_APU_FW = 0xD,
+ RM_QUEUE_OP_VMR_CONTROL = 0xE,
+ RM_QUEUE_OP_IDENTIFY = 0x202,
+};
+
+struct rm_cmd_sq_hdr {
+ __u16 opcode;
+ __u16 msg_size;
+ __u16 id;
+ __u16 reserved;
+} __packed;
+
+struct rm_cmd_cq_hdr {
+ __u16 id;
+ __u16 reserved;
+} __packed;
+
+struct rm_cmd_sq_bin {
+ __u64 address;
+ __u32 size;
+ __u32 reserved1;
+ __u32 reserved2;
+ __u32 reserved3;
+ __u64 reserved4;
+} __packed;
+
+struct rm_cmd_sq_log_page {
+ __u64 address;
+ __u32 size;
+ __u32 reserved1;
+ __u32 type;
+ __u32 reserved2;
+} __packed;
+
+struct rm_cmd_sq_ctrl {
+ __u32 status;
+} __packed;
+
+struct rm_cmd_sq_data {
+ union {
+ struct rm_cmd_sq_log_page page;
+ struct rm_cmd_sq_bin bin;
+ struct rm_cmd_sq_ctrl ctrl;
+ };
+} __packed;
+
+struct rm_cmd_cq_identify {
+ __u16 major;
+ __u16 minor;
+ __u32 reserved;
+} __packed;
+
+struct rm_cmd_cq_log_page {
+ __u32 len;
+ __u32 reserved;
+} __packed;
+
+struct rm_cmd_cq_control {
+ __u16 status;
+ __u16 reserved1;
+ __u32 reserved2;
+} __packed;
+
+struct rm_cmd_cq_data {
+ union {
+ struct rm_cmd_cq_identify identify;
+ struct rm_cmd_cq_log_page page;
+ struct rm_cmd_cq_control ctrl;
+ __u32 reserved[2];
+ };
+ __u32 rcode;
+} __packed;
+
+struct rm_cmd_sq_msg {
+ struct rm_cmd_sq_hdr hdr;
+ struct rm_cmd_sq_data data;
+} __packed;
+
+struct rm_cmd_cq_msg {
+ struct rm_cmd_cq_hdr hdr;
+ struct rm_cmd_cq_data data;
+} __packed;
+
+struct rm_cmd {
+ struct rm_device *rdev;
+ struct list_head list;
+ struct completion executed;
+ struct rm_cmd_sq_msg sq_msg;
+ struct rm_cmd_cq_msg cq_msg;
+ enum rm_queue_opcode opcode;
+ __u8 *buffer;
+ ssize_t size;
+};
+
+enum rm_queue_type {
+ RM_QUEUE_SQ,
+ RM_QUEUE_CQ
+};
+
+enum rm_cmd_log_page_type {
+ RM_CMD_LOG_PAGE_AXI_TRIP_STATUS = 0x0,
+ RM_CMD_LOG_PAGE_FW_ID = 0xA,
+};
+
+struct rm_queue {
+ enum rm_queue_type type;
+ __u32 pidx;
+ __u32 cidx;
+ __u32 offset;
+ __u32 data_offset;
+ __u32 data_size;
+ struct semaphore data_lock;
+};
+
+struct rm_queue_header {
+ __u32 magic;
+ __u32 version;
+ __u32 size;
+ __u32 sq_off;
+ __u32 sq_slot_size;
+ __u32 cq_off;
+ __u32 sq_cidx;
+ __u32 cq_cidx;
+};
+
+struct rm_header {
+ __u32 magic;
+ __u32 queue_base;
+ __u32 queue_size;
+ __u32 status_off;
+ __u32 status_len;
+ __u32 log_index;
+ __u32 log_off;
+ __u32 log_size;
+ __u32 data_start;
+ __u32 data_end;
+};
+
+struct rm_device {
+ struct versal_pci_device *vdev;
+
+ struct rm_header rm_metadata;
+ __u32 queue_buffer_start;
+ __u32 queue_buffer_size;
+ __u32 queue_base;
+
+ /* Lock to queue access */
+ struct mutex queue;
+ struct rm_queue sq;
+ struct rm_queue cq;
+ __u32 queue_size;
+
+ struct timer_list msg_timer;
+ struct work_struct msg_monitor;
+ struct timer_list health_timer;
+ struct work_struct health_monitor;
+ struct list_head submitted_cmds;
+
+ __u32 firewall_tripped;
+};
+
+#endif /* __RM_SERVICE_H */
--
2.34.1