[PATCH v4 02/12] PCI: endpoint: pci-epf-vntb: Defer pci_epc_raise_irq() out of atomic context

From: Koichiro Den

Date: Tue May 12 2026 - 22:53:00 EST


The NTB .peer_db_set() callback may be invoked from atomic context.
pci-epf-vntb currently calls pci_epc_raise_irq() directly, but
pci_epc_raise_irq() may sleep (it takes epc->lock).

Avoid sleeping in atomic context by coalescing doorbell bits into an
atomic64 pending mask and raising MSIs from a work item. Limit the
amount of work per run to avoid monopolizing the workqueue under a
doorbell storm.

Clear stale pending bits before enabling the work item and after disabling
it during cleanup. Also mask requested doorbells against the currently
valid doorbell mask before queueing work, and iterate the pending u64 with
__ffs64() so high doorbell bits are handled correctly.

Fixes: e35f56bb0330 ("PCI: endpoint: Support NTB transfer between RC and EP")
Signed-off-by: Koichiro Den <den@xxxxxxxxxxxxx>
---
Changes since v3:
- Clear peer_db_pending before enabling and after disabling the work item.
- Mask peer_db_set() requests with db_valid_mask before queueing work.
- Replace ffs() iteration with u64-safe __ffs64() iteration.
- Not pick Frank's Reviewed-by tag for v3 02/10 due to the added behavior.

drivers/pci/endpoint/functions/pci-epf-vntb.c | 112 +++++++++++++-----
1 file changed, 85 insertions(+), 27 deletions(-)

diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c
index 668d25abc7f2..cc0b356973f3 100644
--- a/drivers/pci/endpoint/functions/pci-epf-vntb.c
+++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c
@@ -37,6 +37,7 @@
*/

#include <linux/atomic.h>
+#include <linux/bitops.h>
#include <linux/delay.h>
#include <linux/io.h>
#include <linux/module.h>
@@ -69,6 +70,9 @@ static struct workqueue_struct *kpcintb_workqueue;
#define MAX_DB_COUNT 32
#define MAX_MW 4

+/* Limit per-work execution to avoid monopolizing kworker on doorbell storms. */
+#define VNTB_PEER_DB_WORK_BUDGET 5
+
enum epf_ntb_bar {
BAR_CONFIG,
BAR_DB,
@@ -129,6 +133,8 @@ struct epf_ntb {
u32 spad_count;
u64 mws_size[MAX_MW];
atomic64_t db;
+ atomic64_t peer_db_pending;
+ struct work_struct peer_db_work;
u32 vbus_number;
u16 vntb_pid;
u16 vntb_vid;
@@ -972,6 +978,9 @@ static int epf_ntb_epc_init(struct epf_ntb *ntb)
INIT_DELAYED_WORK(&ntb->cmd_handler, epf_ntb_cmd_handler);
queue_work(kpcintb_workqueue, &ntb->cmd_handler.work);

+ atomic64_set(&ntb->peer_db_pending, 0);
+ enable_work(&ntb->peer_db_work);
+
return 0;

err_write_header:
@@ -995,6 +1004,8 @@ static int epf_ntb_epc_init(struct epf_ntb *ntb)
static void epf_ntb_epc_cleanup(struct epf_ntb *ntb)
{
disable_delayed_work_sync(&ntb->cmd_handler);
+ disable_work_sync(&ntb->peer_db_work);
+ atomic64_set(&ntb->peer_db_pending, 0);
epf_ntb_mw_bar_clear(ntb, ntb->num_mws);
epf_ntb_db_bar_clear(ntb);
epf_ntb_config_sspad_bar_clear(ntb);
@@ -1409,41 +1420,84 @@ static int vntb_epf_peer_spad_write(struct ntb_dev *ndev, int pidx, int idx, u32
return 0;
}

+static void vntb_epf_peer_db_work(struct work_struct *work)
+{
+ struct epf_ntb *ntb = container_of(work, struct epf_ntb, peer_db_work);
+ struct pci_epf *epf = ntb->epf;
+ unsigned int budget = VNTB_PEER_DB_WORK_BUDGET;
+ u8 func_no, vfunc_no;
+ unsigned int db_bit;
+ u32 interrupt_num;
+ u64 db_bits;
+ int ret;
+
+ if (!epf || !epf->epc)
+ return;
+
+ func_no = epf->func_no;
+ vfunc_no = epf->vfunc_no;
+
+ /*
+ * Drain doorbells from peer_db_pending in snapshots (atomic64_xchg()).
+ * Limit the number of snapshots handled per run so we don't monopolize
+ * the workqueue under a doorbell storm.
+ */
+ while (budget--) {
+ db_bits = atomic64_xchg(&ntb->peer_db_pending, 0);
+ if (!db_bits)
+ return;
+
+ while (db_bits) {
+ /*
+ * pci_epc_raise_irq() for MSI expects a 1-based
+ * interrupt number. db_bit is zero-based, so add 3 to
+ * preserve the historical slot offset.
+ *
+ * Legacy mapping (kept for compatibility):
+ *
+ * MSI #1 : link event (reserved)
+ * MSI #2 : unused (historical offset)
+ * MSI #3 : doorbell bit 0 (DB#0)
+ * MSI #4 : doorbell bit 1 (DB#1)
+ * ...
+ *
+ * Do not change this mapping to avoid breaking
+ * interoperability with older peers.
+ */
+ db_bit = __ffs64(db_bits);
+ interrupt_num = db_bit + 3;
+ db_bits &= ~BIT_ULL(db_bit);
+
+ ret = pci_epc_raise_irq(epf->epc, func_no, vfunc_no,
+ PCI_IRQ_MSI, interrupt_num);
+ if (ret)
+ dev_err(&ntb->ntb.dev,
+ "Failed to raise IRQ for interrupt_num %u: %d\n",
+ interrupt_num, ret);
+ }
+ }
+
+ if (atomic64_read(&ntb->peer_db_pending))
+ queue_work(kpcintb_workqueue, &ntb->peer_db_work);
+}
+
static int vntb_epf_peer_db_set(struct ntb_dev *ndev, u64 db_bits)
{
- u32 interrupt_num = ffs(db_bits) + 1;
struct epf_ntb *ntb = ntb_ndev(ndev);
- u8 func_no, vfunc_no;
- int ret;

- func_no = ntb->epf->func_no;
- vfunc_no = ntb->epf->vfunc_no;
+ db_bits &= vntb_epf_db_valid_mask(ndev);
+ if (!db_bits)
+ return 0;

/*
- * pci_epc_raise_irq() for MSI expects a 1-based interrupt number.
- * ffs() returns a 1-based index (bit 0 -> 1). interrupt_num has already
- * been computed as ffs(db_bits) + 1 above. Adding one more +1 when
- * calling pci_epc_raise_irq() therefore results in:
- *
- * doorbell bit 0 -> MSI #3
- *
- * Legacy mapping (kept for compatibility):
- *
- * MSI #1 : link event (reserved)
- * MSI #2 : unused (historical offset)
- * MSI #3 : doorbell bit 0 (DB#0)
- * MSI #4 : doorbell bit 1 (DB#1)
- * ...
- *
- * Do not change this mapping to avoid breaking interoperability with
- * older peers.
+ * .peer_db_set() may be called from atomic context. pci_epc_raise_irq()
+ * can sleep (it takes epc->lock), so defer MSI raising to process
+ * context. Doorbell requests are coalesced in peer_db_pending.
*/
- ret = pci_epc_raise_irq(ntb->epf->epc, func_no, vfunc_no,
- PCI_IRQ_MSI, interrupt_num + 1);
- if (ret)
- dev_err(&ntb->ntb.dev, "Failed to raise IRQ\n");
+ atomic64_or(db_bits, &ntb->peer_db_pending);
+ queue_work(kpcintb_workqueue, &ntb->peer_db_work);

- return ret;
+ return 0;
}

static u64 vntb_epf_db_read(struct ntb_dev *ndev)
@@ -1690,6 +1744,10 @@ static int epf_ntb_probe(struct pci_epf *epf,
ntb->epf = epf;
ntb->vbus_number = 0xff;

+ INIT_WORK(&ntb->peer_db_work, vntb_epf_peer_db_work);
+ disable_work(&ntb->peer_db_work);
+ atomic64_set(&ntb->peer_db_pending, 0);
+
/* Initially, no bar is assigned */
for (i = 0; i < VNTB_BAR_NUM; i++)
ntb->epf_ntb_bar[i] = NO_BAR;
--
2.51.0