[PATCH net-next v7 04/10] enic: add admin CQ service with MSI-X interrupt and NAPI polling

From: Satish Kharat

Date: Wed May 13 2026 - 11:33:28 EST


Add completion queue service for the admin channel WQ and RQ, driven
by an MSI-X interrupt and NAPI polling.

The receive pipeline is: MSI-X ISR -> NAPI poll -> RQ CQ service ->
message enqueue -> workqueue handler -> admin_rq_handler callback.
NAPI drains the RQ CQ in softirq context, copying each received
buffer into an enic_admin_msg and appending it to a spinlock-protected
list. A system workqueue handler then processes each message in
process context where sleeping (mutex, GFP_KERNEL allocations) is
safe.

The WQ CQ service counts transmit completions and is called from the
synchronous MBOX send path. Interrupt generation is disabled on the
WQ CQ (admin_cq[0]) because it is polled synchronously; only the RQ
CQ (admin_cq[1]) is interrupt-driven via NAPI.

The NAPI poll path refills admin RQ buffers with GFP_ATOMIC. Log a
rate-limited warning when admin RQ buffer refill fails in NAPI context.

The admin channel open/close paths set up and tear down the MSI-X
interrupt, NAPI instance, and workqueue. CQ init enables interrupt
delivery on the RQ CQ and sets the interrupt offset so completions
trigger the admin ISR.

The admin interrupt is allocated from the RES_TYPE_INTR_CTRL pool
at index == intr_count, the first slot beyond the data-path IRQs.

Signed-off-by: Satish Kharat <satishkh@xxxxxxxxx>
---
drivers/net/ethernet/cisco/enic/enic.h | 8 +
drivers/net/ethernet/cisco/enic/enic_admin.c | 303 ++++++++++++++++++++++++++-
drivers/net/ethernet/cisco/enic/enic_admin.h | 12 ++
3 files changed, 319 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h
index 08472420f3a1..1c09da3c0b1a 100644
--- a/drivers/net/ethernet/cisco/enic/enic.h
+++ b/drivers/net/ethernet/cisco/enic/enic.h
@@ -296,6 +296,14 @@ struct enic {
struct vnic_rq admin_rq;
struct vnic_cq admin_cq[2];
struct vnic_intr admin_intr;
+ struct napi_struct admin_napi;
+ unsigned int admin_intr_index;
+ struct work_struct admin_msg_work;
+ spinlock_t admin_msg_lock; /* protects admin_msg_list */
+ struct list_head admin_msg_list;
+ u64 admin_msg_drop_cnt;
+ void (*admin_rq_handler)(struct enic *enic, void *buf,
+ unsigned int len);
};

static inline struct net_device *vnic_get_netdev(struct vnic_dev *vdev)
diff --git a/drivers/net/ethernet/cisco/enic/enic_admin.c b/drivers/net/ethernet/cisco/enic/enic_admin.c
index b28fc6c656cc..9ebc69c7dda6 100644
--- a/drivers/net/ethernet/cisco/enic/enic_admin.c
+++ b/drivers/net/ethernet/cisco/enic/enic_admin.c
@@ -4,6 +4,7 @@
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>

#include "vnic_dev.h"
#include "vnic_wq.h"
@@ -15,6 +16,7 @@
#include "enic.h"
#include "enic_admin.h"
#include "cq_desc.h"
+#include "cq_enet_desc.h"
#include "wq_enet_desc.h"
#include "rq_enet_desc.h"

@@ -94,6 +96,248 @@ static void enic_admin_rq_drain(struct enic *enic)
vnic_rq_clean(&enic->admin_rq, enic_admin_rq_buf_clean);
}

+static unsigned int enic_admin_cq_color(void *cq_desc, unsigned int desc_size)
+{
+ u8 type_color = *((u8 *)cq_desc + desc_size - 1);
+
+ return (type_color >> CQ_DESC_COLOR_SHIFT) & CQ_DESC_COLOR_MASK;
+}
+
+unsigned int enic_admin_wq_cq_service(struct enic *enic)
+{
+ struct vnic_cq *cq = &enic->admin_cq[0];
+ unsigned int work = 0;
+ void *desc;
+
+ desc = vnic_cq_to_clean(cq);
+ while (enic_admin_cq_color(desc, cq->ring.desc_size) !=
+ cq->last_color) {
+ /* Ensure color bit is read before descriptor fields */
+ rmb();
+ vnic_cq_inc_to_clean(cq);
+ work++;
+ desc = vnic_cq_to_clean(cq);
+ }
+
+ return work;
+}
+
+static void enic_admin_msg_enqueue(struct enic *enic, void *buf,
+ unsigned int len)
+{
+ struct enic_admin_msg *msg;
+
+ msg = kmalloc(struct_size(msg, data, len), GFP_ATOMIC);
+ if (!msg) {
+ enic->admin_msg_drop_cnt++;
+ return;
+ }
+
+ msg->len = len;
+ memcpy(msg->data, buf, len);
+
+ spin_lock(&enic->admin_msg_lock);
+ list_add_tail(&msg->list, &enic->admin_msg_list);
+ spin_unlock(&enic->admin_msg_lock);
+}
+
+unsigned int enic_admin_rq_cq_service(struct enic *enic, unsigned int budget)
+{
+ struct vnic_cq *cq = &enic->admin_cq[1];
+ struct vnic_rq *rq = &enic->admin_rq;
+ struct cq_enet_rq_desc *rq_desc;
+ struct vnic_rq_buf *buf;
+ u16 bwf, bytes_written;
+ unsigned int work = 0;
+ void *desc;
+
+ desc = vnic_cq_to_clean(cq);
+ while (work < budget &&
+ enic_admin_cq_color(desc, cq->ring.desc_size) !=
+ cq->last_color) {
+ /* Ensure CQ descriptor fields are read after
+ * the color/valid check.
+ */
+ rmb();
+ buf = rq->to_clean;
+
+ /* Decode the actual number of bytes hardware wrote into
+ * the RX buffer. buf->len is the static allocation size
+ * (ENIC_ADMIN_BUF_SIZE) and would expose uninitialised
+ * heap memory beyond the real payload. bytes_written_flags
+ * is at the same offset in every cq_enet_rq_desc[_32|_64]
+ * variant.
+ */
+ rq_desc = desc;
+ bwf = le16_to_cpu(rq_desc->bytes_written_flags);
+ bytes_written = bwf & CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK;
+ if (bytes_written > buf->len)
+ goto next_desc;
+
+ dma_sync_single_for_cpu(&enic->pdev->dev,
+ buf->dma_addr, buf->len,
+ DMA_FROM_DEVICE);
+
+ /* Drop on hardware error indications. Admin messages
+ * are internal to the VIC, not received over the wire.
+ * Firmware sets TRUNCATED when the message does not fit
+ * in the posted buffer, and FCS_OK is always set on
+ * healthy admin completions.
+ */
+ if (bwf & CQ_ENET_RQ_DESC_FLAGS_TRUNCATED) {
+ netdev_warn_once(enic->netdev,
+ "admin RQ: truncated message dropped\n");
+ goto next_desc;
+ }
+ if (!(rq_desc->flags & CQ_ENET_RQ_DESC_FLAGS_FCS_OK)) {
+ netdev_warn_once(enic->netdev,
+ "admin RQ: bad FCS, dropping message\n");
+ goto next_desc;
+ }
+
+ enic_admin_msg_enqueue(enic, buf->os_buf, bytes_written);
+
+next_desc:
+ enic_admin_rq_buf_clean(rq, rq->to_clean);
+ rq->to_clean = rq->to_clean->next;
+ rq->ring.desc_avail++;
+
+ vnic_cq_inc_to_clean(cq);
+ work++;
+ desc = vnic_cq_to_clean(cq);
+ }
+
+ if (enic_admin_rq_fill(enic, GFP_ATOMIC))
+ work = budget;
+
+ return work;
+}
+
+static irqreturn_t enic_admin_isr_msix(int irq, void *data)
+{
+ struct napi_struct *napi = data;
+
+ napi_schedule_irqoff(napi);
+
+ return IRQ_HANDLED;
+}
+
+static void enic_admin_msg_work_handler(struct work_struct *work)
+{
+ struct enic *enic = container_of(work, struct enic, admin_msg_work);
+ void (*handler)(struct enic *, void *, unsigned int);
+ struct enic_admin_msg *msg, *tmp;
+ LIST_HEAD(local_list);
+
+ handler = READ_ONCE(enic->admin_rq_handler);
+
+ spin_lock_bh(&enic->admin_msg_lock);
+ list_splice_init(&enic->admin_msg_list, &local_list);
+ spin_unlock_bh(&enic->admin_msg_lock);
+
+ list_for_each_entry_safe(msg, tmp, &local_list, list) {
+ if (handler)
+ handler(enic, msg->data, msg->len);
+ list_del(&msg->list);
+ kfree(msg);
+ }
+}
+
+static int enic_admin_napi_poll(struct napi_struct *napi, int budget)
+{
+ struct enic *enic = container_of(napi, struct enic, admin_napi);
+ unsigned int credits;
+ unsigned int rq_work;
+
+ credits = vnic_intr_credits(&enic->admin_intr);
+
+ rq_work = enic_admin_rq_cq_service(enic, budget);
+
+ if (rq_work > 0)
+ schedule_work(&enic->admin_msg_work);
+
+ if (rq_work < budget && napi_complete_done(napi, rq_work)) {
+ vnic_intr_return_credits(&enic->admin_intr,
+ credits ?: 1,
+ 1 /* unmask */, 0);
+ } else {
+ if (credits)
+ vnic_intr_return_credits(&enic->admin_intr, credits,
+ 0 /* don't unmask */, 0);
+ }
+
+ return rq_work;
+}
+
+static int enic_admin_setup_intr(struct enic *enic)
+{
+ unsigned int intr_index = enic->intr_count;
+ int err;
+
+ if (vnic_dev_get_intr_mode(enic->vdev) != VNIC_DEV_INTR_MODE_MSIX ||
+ intr_index >= enic->intr_avail)
+ return -ENODEV;
+
+ /* The admin INTR uses a slot in the same RES_TYPE_INTR_CTRL
+ * strided array of per-vector control blocks (mask, coalescing
+ * timer, credit return) that the data-path IRQs occupy in BAR0.
+ * vnic_intr_alloc() defaults to RES_TYPE_INTR_CTRL, which is what
+ * we want here.
+ */
+ err = vnic_intr_alloc(enic->vdev, &enic->admin_intr, intr_index);
+ if (err) {
+ netdev_warn(enic->netdev,
+ "Failed to alloc admin intr at index %u: %d\n",
+ intr_index, err);
+ return err;
+ }
+
+ enic->admin_intr_index = intr_index;
+
+ snprintf(enic->msix[intr_index].devname,
+ sizeof(enic->msix[intr_index].devname),
+ "%s-admin", enic->netdev->name);
+ enic->msix[intr_index].isr = enic_admin_isr_msix;
+ enic->msix[intr_index].devid = &enic->admin_napi;
+
+ netif_napi_add(enic->netdev, &enic->admin_napi,
+ enic_admin_napi_poll);
+ napi_enable(&enic->admin_napi);
+
+ err = request_irq(enic->msix_entry[intr_index].vector,
+ enic->msix[intr_index].isr, 0,
+ enic->msix[intr_index].devname,
+ enic->msix[intr_index].devid);
+ if (err) {
+ netdev_warn(enic->netdev,
+ "Failed to request admin MSI-X irq: %d\n", err);
+ napi_disable(&enic->admin_napi);
+ netif_napi_del(&enic->admin_napi);
+ vnic_intr_free(&enic->admin_intr);
+ return err;
+ }
+
+ enic->msix[intr_index].requested = 1;
+
+ netdev_dbg(enic->netdev,
+ "admin channel using MSI-X interrupt (index %u)\n",
+ intr_index);
+
+ return 0;
+}
+
+static void enic_admin_teardown_intr(struct enic *enic)
+{
+ unsigned int intr_index = enic->admin_intr_index;
+
+ napi_disable(&enic->admin_napi);
+ netif_napi_del(&enic->admin_napi);
+
+ free_irq(enic->msix_entry[intr_index].vector,
+ enic->msix[intr_index].devid);
+ enic->msix[intr_index].requested = 0;
+}
+
static int enic_admin_qp_type_set(struct enic *enic, u32 enable)
{
u64 a0 = QP_TYPE_ADMIN, a1 = enable;
@@ -173,6 +417,7 @@ static int enic_admin_alloc_resources(struct enic *enic)

static void enic_admin_free_resources(struct enic *enic)
{
+ vnic_intr_free(&enic->admin_intr);
vnic_cq_free(&enic->admin_cq[1]);
vnic_cq_free(&enic->admin_cq[0]);
vnic_rq_free(&enic->admin_rq);
@@ -181,6 +426,8 @@ static void enic_admin_free_resources(struct enic *enic)

static void enic_admin_init_resources(struct enic *enic)
{
+ unsigned int intr_offset = enic->admin_intr_index;
+
vnic_wq_init(&enic->admin_wq,
0, 0, 0); /* cq_index, err_intr_enable, err_intr_offset */
vnic_rq_init(&enic->admin_rq,
@@ -189,20 +436,34 @@ static void enic_admin_init_resources(struct enic *enic)
VNIC_CQ_FC_DISABLE,
VNIC_CQ_COLOR_ENABLE,
0, 0, 1, /* cq_head, cq_tail, cq_tail_color */
- VNIC_CQ_INTR_DISABLE,
+ VNIC_CQ_INTR_DISABLE, /* polled synchronously by mbox send */
VNIC_CQ_ENTRY_ENABLE,
VNIC_CQ_MSG_DISABLE,
- 0, /* interrupt_offset */
+ intr_offset,
0 /* cq_message_addr */);
vnic_cq_init(&enic->admin_cq[1],
VNIC_CQ_FC_DISABLE,
VNIC_CQ_COLOR_ENABLE,
0, 0, 1, /* cq_head, cq_tail, cq_tail_color */
- VNIC_CQ_INTR_DISABLE,
+ VNIC_CQ_INTR_ENABLE,
VNIC_CQ_ENTRY_ENABLE,
VNIC_CQ_MSG_DISABLE,
- 0, /* interrupt_offset */
+ intr_offset,
0 /* cq_message_addr */);
+ vnic_intr_init(&enic->admin_intr,
+ 0, 0, 1); /* coalescing_timer, coalescing_type, mask_on_assertion */
+}
+
+static void enic_admin_msg_drain(struct enic *enic)
+{
+ struct enic_admin_msg *msg, *tmp;
+
+ spin_lock_bh(&enic->admin_msg_lock);
+ list_for_each_entry_safe(msg, tmp, &enic->admin_msg_list, list) {
+ list_del(&msg->list);
+ kfree(msg);
+ }
+ spin_unlock_bh(&enic->admin_msg_lock);
}

int enic_admin_channel_open(struct enic *enic)
@@ -220,6 +481,18 @@ int enic_admin_channel_open(struct enic *enic)
return err;
}

+ spin_lock_init(&enic->admin_msg_lock);
+ INIT_LIST_HEAD(&enic->admin_msg_list);
+ INIT_WORK(&enic->admin_msg_work, enic_admin_msg_work_handler);
+
+ err = enic_admin_setup_intr(enic);
+ if (err) {
+ netdev_err(enic->netdev,
+ "Admin channel requires MSI-X, SR-IOV unavailable: %d\n",
+ err);
+ goto free_resources;
+ }
+
enic_admin_init_resources(enic);

vnic_wq_enable(&enic->admin_wq);
@@ -239,15 +512,29 @@ int enic_admin_channel_open(struct enic *enic)
goto disable_queues;
}

+ vnic_intr_unmask(&enic->admin_intr);
+
+ netdev_dbg(enic->netdev,
+ "admin channel open: intr=%u wq_avail=%u rq_avail=%u cq0_color=%u cq1_color=%u\n",
+ enic->admin_intr_index,
+ vnic_wq_desc_avail(&enic->admin_wq),
+ vnic_rq_desc_avail(&enic->admin_rq),
+ enic->admin_cq[0].last_color,
+ enic->admin_cq[1].last_color);
+
return 0;

disable_queues:
+ enic_admin_teardown_intr(enic);
enic_admin_qp_type_set(enic, QP_DISABLE);
if (vnic_wq_disable(&enic->admin_wq))
netdev_warn(enic->netdev, "Failed to disable admin WQ\n");
if (vnic_rq_disable(&enic->admin_rq))
netdev_warn(enic->netdev, "Failed to disable admin RQ\n");
+ cancel_work_sync(&enic->admin_msg_work);
+ enic_admin_msg_drain(enic);
enic_admin_rq_drain(enic);
+free_resources:
enic_admin_free_resources(enic);
return err;
}
@@ -259,6 +546,13 @@ void enic_admin_channel_close(struct enic *enic)
if (!enic->has_admin_channel)
return;

+ netdev_dbg(enic->netdev, "admin channel close\n");
+
+ vnic_intr_mask(&enic->admin_intr);
+ enic_admin_teardown_intr(enic);
+ cancel_work_sync(&enic->admin_msg_work);
+ enic_admin_msg_drain(enic);
+
enic_admin_qp_type_set(enic, QP_DISABLE);

err = vnic_wq_disable(&enic->admin_wq);
@@ -274,5 +568,6 @@ void enic_admin_channel_close(struct enic *enic)
enic_admin_rq_drain(enic);
vnic_cq_clean(&enic->admin_cq[0]);
vnic_cq_clean(&enic->admin_cq[1]);
+ vnic_intr_clean(&enic->admin_intr);
enic_admin_free_resources(enic);
}
diff --git a/drivers/net/ethernet/cisco/enic/enic_admin.h b/drivers/net/ethernet/cisco/enic/enic_admin.h
index 569aadeb9312..0e52d6e0586e 100644
--- a/drivers/net/ethernet/cisco/enic/enic_admin.h
+++ b/drivers/net/ethernet/cisco/enic/enic_admin.h
@@ -9,7 +9,19 @@

struct enic;

+/* Wrapper for received admin messages queued for deferred processing.
+ * NAPI enqueues these; a workqueue handler processes them in process context
+ * where sleeping (mutex, GFP_KERNEL) is safe.
+ */
+struct enic_admin_msg {
+ struct list_head list;
+ unsigned int len;
+ u8 data[] __aligned(8);
+};
+
int enic_admin_channel_open(struct enic *enic);
void enic_admin_channel_close(struct enic *enic);
+unsigned int enic_admin_wq_cq_service(struct enic *enic);
+unsigned int enic_admin_rq_cq_service(struct enic *enic, unsigned int budget);

#endif /* _ENIC_ADMIN_H_ */

--
2.43.0