Re: [PATCH 02/10] PCI: endpoint: pci-epf-vntb: Defer pci_epc_raise_irq() out of atomic context
From: Koichiro Den
Date: Wed Feb 25 2026 - 20:04:24 EST
On Wed, Feb 25, 2026 at 03:36:05PM -0500, Frank Li wrote:
> On Tue, Feb 24, 2026 at 10:34:51PM +0900, Koichiro Den wrote:
> > The NTB .peer_db_set() callback may be invoked from atomic context.
> > pci-epf-vntb currently calls pci_epc_raise_irq() directly, but
> > pci_epc_raise_irq() may sleep (it takes epc->lock).
>
> Can we use thread irq handle()?
I don't think so. ntb_peer_db_set() can be invoked from softirq context.
Thanks for the review,
Koichiro
>
> Frank
> >
> > Avoid sleeping in atomic context by coalescing doorbell bits into an
> > atomic64 pending mask and raising MSIs from a work item. Limit the
> > amount of work per run to avoid monopolizing the workqueue under a
> > doorbell storm.
> >
> > Fixes: e35f56bb0330 ("PCI: endpoint: Support NTB transfer between RC and EP")
> > Signed-off-by: Koichiro Den <den@xxxxxxxxxxxxx>
> > ---
> > drivers/pci/endpoint/functions/pci-epf-vntb.c | 109 +++++++++++++-----
> > 1 file changed, 81 insertions(+), 28 deletions(-)
> >
> > diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c
> > index 4328f05acfe6..c00898ac8269 100644
> > --- a/drivers/pci/endpoint/functions/pci-epf-vntb.c
> > +++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c
> > @@ -69,6 +69,9 @@ static struct workqueue_struct *kpcintb_workqueue;
> > #define MAX_DB_COUNT 32
> > #define MAX_MW 4
> >
> > +/* Limit per-work execution to avoid monopolizing kworker on doorbell storms. */
> > +#define VNTB_PEER_DB_WORK_BUDGET 5
> > +
> > enum epf_ntb_bar {
> > BAR_CONFIG,
> > BAR_DB,
> > @@ -129,6 +132,8 @@ struct epf_ntb {
> > u32 spad_count;
> > u64 mws_size[MAX_MW];
> > atomic64_t db;
> > + atomic64_t peer_db_pending;
> > + struct work_struct peer_db_work;
> > u32 vbus_number;
> > u16 vntb_pid;
> > u16 vntb_vid;
> > @@ -933,6 +938,8 @@ static int epf_ntb_epc_init(struct epf_ntb *ntb)
> > INIT_DELAYED_WORK(&ntb->cmd_handler, epf_ntb_cmd_handler);
> > queue_work(kpcintb_workqueue, &ntb->cmd_handler.work);
> >
> > + enable_work(&ntb->peer_db_work);
> > +
> > return 0;
> >
> > err_write_header:
> > @@ -955,6 +962,7 @@ static int epf_ntb_epc_init(struct epf_ntb *ntb)
> > */
> > static void epf_ntb_epc_cleanup(struct epf_ntb *ntb)
> > {
> > + disable_work_sync(&ntb->peer_db_work);
> > epf_ntb_mw_bar_clear(ntb, ntb->num_mws);
> > epf_ntb_db_bar_clear(ntb);
> > epf_ntb_config_sspad_bar_clear(ntb);
> > @@ -1365,41 +1373,82 @@ static int vntb_epf_peer_spad_write(struct ntb_dev *ndev, int pidx, int idx, u32
> > return 0;
> > }
> >
> > +static void vntb_epf_peer_db_work(struct work_struct *work)
> > +{
> > + struct epf_ntb *ntb = container_of(work, struct epf_ntb, peer_db_work);
> > + struct pci_epf *epf = ntb->epf;
> > + unsigned int budget = VNTB_PEER_DB_WORK_BUDGET;
> > + u8 func_no, vfunc_no;
> > + u32 interrupt_num;
> > + u64 db_bits;
> > + int ret;
> > +
> > + if (!epf || !epf->epc)
> > + return;
> > +
> > + func_no = epf->func_no;
> > + vfunc_no = epf->vfunc_no;
> > +
> > + /*
> > + * Drain doorbells from peer_db_pending in snapshots (atomic64_xchg()).
> > + * Limit the number of snapshots handled per run so we don't monopolize
> > + * the workqueue under a doorbell storm.
> > + */
> > + while (budget--) {
> > + db_bits = atomic64_xchg(&ntb->peer_db_pending, 0);
> > + if (!db_bits)
> > + return;
> > +
> > + while (db_bits) {
> > + /*
> > + * pci_epc_raise_irq() for MSI expects a 1-based
> > + * interrupt number. ffs() returns a 1-based index (bit
> > + * 0 -> 1). interrupt_num has already been computed as
> > + * ffs(db_bits) + 1 above. Adding one more +1 when
> > + * calling pci_epc_raise_irq() therefore results in:
> > + *
> > + * doorbell bit 0 -> MSI #3
> > + *
> > + * Legacy mapping (kept for compatibility):
> > + *
> > + * MSI #1 : link event (reserved)
> > + * MSI #2 : unused (historical offset)
> > + * MSI #3 : doorbell bit 0 (DB#0)
> > + * MSI #4 : doorbell bit 1 (DB#1)
> > + * ...
> > + *
> > + * Do not change this mapping to avoid breaking
> > + * interoperability with older peers.
> > + */
> > + interrupt_num = ffs(db_bits) + 2;
> > + db_bits &= db_bits - 1;
> > +
> > + ret = pci_epc_raise_irq(epf->epc, func_no, vfunc_no,
> > + PCI_IRQ_MSI, interrupt_num);
> > + if (ret)
> > + dev_err(&ntb->ntb.dev,
> > + "Failed to raise IRQ for interrupt_num %u: %d\n",
> > + interrupt_num, ret);
> > + }
> > + }
> > +
> > + if (atomic64_read(&ntb->peer_db_pending))
> > + queue_work(kpcintb_workqueue, &ntb->peer_db_work);
> > +}
> > +
> > static int vntb_epf_peer_db_set(struct ntb_dev *ndev, u64 db_bits)
> > {
> > - u32 interrupt_num = ffs(db_bits) + 1;
> > struct epf_ntb *ntb = ntb_ndev(ndev);
> > - u8 func_no, vfunc_no;
> > - int ret;
> > -
> > - func_no = ntb->epf->func_no;
> > - vfunc_no = ntb->epf->vfunc_no;
> >
> > /*
> > - * pci_epc_raise_irq() for MSI expects a 1-based interrupt number.
> > - * ffs() returns a 1-based index (bit 0 -> 1). interrupt_num has already
> > - * been computed as ffs(db_bits) + 1 above. Adding one more +1 when
> > - * calling pci_epc_raise_irq() therefore results in:
> > - *
> > - * doorbell bit 0 -> MSI #3
> > - *
> > - * Legacy mapping (kept for compatibility):
> > - *
> > - * MSI #1 : link event (reserved)
> > - * MSI #2 : unused (historical offset)
> > - * MSI #3 : doorbell bit 0 (DB#0)
> > - * MSI #4 : doorbell bit 1 (DB#1)
> > - * ...
> > - *
> > - * Do not change this mapping to avoid breaking interoperability with
> > - * older peers.
> > + * .peer_db_set() may be called from atomic context. pci_epc_raise_irq()
> > + * can sleep (it takes epc->lock), so defer MSI raising to process
> > + * context. Doorbell requests are coalesced in peer_db_pending.
> > */
> > - ret = pci_epc_raise_irq(ntb->epf->epc, func_no, vfunc_no,
> > - PCI_IRQ_MSI, interrupt_num + 1);
> > - if (ret)
> > - dev_err(&ntb->ntb.dev, "Failed to raise IRQ\n");
> > + atomic64_or(db_bits, &ntb->peer_db_pending);
> > + queue_work(kpcintb_workqueue, &ntb->peer_db_work);
> >
> > - return ret;
> > + return 0;
> > }
> >
> > static u64 vntb_epf_db_read(struct ntb_dev *ndev)
> > @@ -1641,6 +1690,10 @@ static int epf_ntb_probe(struct pci_epf *epf,
> > ntb->epf = epf;
> > ntb->vbus_number = 0xff;
> >
> > + INIT_WORK(&ntb->peer_db_work, vntb_epf_peer_db_work);
> > + disable_work(&ntb->peer_db_work);
> > + atomic64_set(&ntb->peer_db_pending, 0);
> > +
> > /* Initially, no bar is assigned */
> > for (i = 0; i < VNTB_BAR_NUM; i++)
> > ntb->epf_ntb_bar[i] = NO_BAR;
> > --
> > 2.51.0
> >