Re: [PATCH v2 02/10] PCI: endpoint: pci-epf-vntb: Defer pci_epc_raise_irq() out of atomic context

From: Frank Li

Date: Fri Feb 27 2026 - 10:55:48 EST


On Fri, Feb 27, 2026 at 05:49:47PM +0900, Koichiro Den wrote:
> The NTB .peer_db_set() callback may be invoked from atomic context.
> pci-epf-vntb currently calls pci_epc_raise_irq() directly, but
> pci_epc_raise_irq() may sleep (it takes epc->lock).
>
> Avoid sleeping in atomic context by coalescing doorbell bits into an
> atomic64 pending mask and raising MSIs from a work item. Limit the
> amount of work per run to avoid monopolizing the workqueue under a
> doorbell storm.
>
> Fixes: e35f56bb0330 ("PCI: endpoint: Support NTB transfer between RC and EP")
> Signed-off-by: Koichiro Den <den@xxxxxxxxxxxxx>
> ---
> Changes since v1:
> - No functional changes.
> - Updated comment in vntb_epf_peer_db_work().
>
> drivers/pci/endpoint/functions/pci-epf-vntb.c | 106 +++++++++++++-----
> 1 file changed, 78 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c
> index 83372aba5106..e2c0b6dba793 100644
> --- a/drivers/pci/endpoint/functions/pci-epf-vntb.c
> +++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c
> @@ -69,6 +69,9 @@ static struct workqueue_struct *kpcintb_workqueue;
> #define MAX_DB_COUNT 32
> #define MAX_MW 4
>
> +/* Limit per-work execution to avoid monopolizing kworker on doorbell storms. */
> +#define VNTB_PEER_DB_WORK_BUDGET 5
> +
> enum epf_ntb_bar {
> BAR_CONFIG,
> BAR_DB,
> @@ -129,6 +132,8 @@ struct epf_ntb {
> u32 spad_count;
> u64 mws_size[MAX_MW];
> atomic64_t db;
> + atomic64_t peer_db_pending;
> + struct work_struct peer_db_work;
> u32 vbus_number;
> u16 vntb_pid;
> u16 vntb_vid;
> @@ -933,6 +938,8 @@ static int epf_ntb_epc_init(struct epf_ntb *ntb)
> INIT_DELAYED_WORK(&ntb->cmd_handler, epf_ntb_cmd_handler);
> queue_work(kpcintb_workqueue, &ntb->cmd_handler.work);
>
> + enable_work(&ntb->peer_db_work);
> +
> return 0;
>
> err_write_header:
> @@ -955,6 +962,7 @@ static int epf_ntb_epc_init(struct epf_ntb *ntb)
> */
> static void epf_ntb_epc_cleanup(struct epf_ntb *ntb)
> {
> + disable_work_sync(&ntb->peer_db_work);
> epf_ntb_mw_bar_clear(ntb, ntb->num_mws);
> epf_ntb_db_bar_clear(ntb);
> epf_ntb_config_sspad_bar_clear(ntb);
> @@ -1369,41 +1377,79 @@ static int vntb_epf_peer_spad_write(struct ntb_dev *ndev, int pidx, int idx, u32
> return 0;
> }
>
> +static void vntb_epf_peer_db_work(struct work_struct *work)
> +{
> + struct epf_ntb *ntb = container_of(work, struct epf_ntb, peer_db_work);
> + struct pci_epf *epf = ntb->epf;
> + unsigned int budget = VNTB_PEER_DB_WORK_BUDGET;
> + u8 func_no, vfunc_no;
> + u32 interrupt_num;
> + u64 db_bits;
> + int ret;
> +
> + if (!epf || !epf->epc)
> + return;
> +
> + func_no = epf->func_no;
> + vfunc_no = epf->vfunc_no;
> +
> + /*
> + * Drain doorbells from peer_db_pending in snapshots (atomic64_xchg()).
> + * Limit the number of snapshots handled per run so we don't monopolize
> + * the workqueue under a doorbell storm.
> + */
> + while (budget--) {
> + db_bits = atomic64_xchg(&ntb->peer_db_pending, 0);
> + if (!db_bits)
> + return;
> +
> + while (db_bits) {
> + /*
> + * pci_epc_raise_irq() for MSI expects a 1-based
> + * interrupt number. ffs() returns a 1-based index (bit
> + * 0 -> 1). We historically add +2 to compute
> + * interrupt_num.
> + *
> + * Legacy mapping (kept for compatibility):
> + *
> + * MSI #1 : link event (reserved)
> + * MSI #2 : unused (historical offset)
> + * MSI #3 : doorbell bit 0 (DB#0)
> + * MSI #4 : doorbell bit 1 (DB#1)
> + * ...
> + *
> + * Do not change this mapping to avoid breaking
> + * interoperability with older peers.
> + */
> + interrupt_num = ffs(db_bits) + 2;
> + db_bits &= db_bits - 1;
> +
> + ret = pci_epc_raise_irq(epf->epc, func_no, vfunc_no,
> + PCI_IRQ_MSI, interrupt_num);
> + if (ret)
> + dev_err(&ntb->ntb.dev,
> + "Failed to raise IRQ for interrupt_num %u: %d\n",
> + interrupt_num, ret);
> + }
> + }
> +
> + if (atomic64_read(&ntb->peer_db_pending))
> + queue_work(kpcintb_workqueue, &ntb->peer_db_work);
> +}
> +
> static int vntb_epf_peer_db_set(struct ntb_dev *ndev, u64 db_bits)
> {
> - u32 interrupt_num = ffs(db_bits) + 1;
> struct epf_ntb *ntb = ntb_ndev(ndev);
> - u8 func_no, vfunc_no;
> - int ret;
> -
> - func_no = ntb->epf->func_no;
> - vfunc_no = ntb->epf->vfunc_no;
>
> /*
> - * pci_epc_raise_irq() for MSI expects a 1-based interrupt number.
> - * ffs() returns a 1-based index (bit 0 -> 1). interrupt_num has already
> - * been computed as ffs(db_bits) + 1 above. Adding one more +1 when
> - * calling pci_epc_raise_irq() therefore results in:
> - *
> - * doorbell bit 0 -> MSI #3
> - *
> - * Legacy mapping (kept for compatibility):
> - *
> - * MSI #1 : link event (reserved)
> - * MSI #2 : unused (historical offset)
> - * MSI #3 : doorbell bit 0 (DB#0)
> - * MSI #4 : doorbell bit 1 (DB#1)
> - * ...
> - *
> - * Do not change this mapping to avoid breaking interoperability with
> - * older peers.
> + * .peer_db_set() may be called from atomic context. pci_epc_raise_irq()
> + * can sleep (it takes epc->lock), so defer MSI raising to process
> + * context. Doorbell requests are coalesced in peer_db_pending.
> */
> - ret = pci_epc_raise_irq(ntb->epf->epc, func_no, vfunc_no,
> - PCI_IRQ_MSI, interrupt_num + 1);
> - if (ret)
> - dev_err(&ntb->ntb.dev, "Failed to raise IRQ\n");
> + atomic64_or(db_bits, &ntb->peer_db_pending);
> + queue_work(kpcintb_workqueue, &ntb->peer_db_work);
>
> - return ret;
> + return 0;
> }
>
> static u64 vntb_epf_db_read(struct ntb_dev *ndev)
> @@ -1645,6 +1691,10 @@ static int epf_ntb_probe(struct pci_epf *epf,
> ntb->epf = epf;
> ntb->vbus_number = 0xff;
>
> + INIT_WORK(&ntb->peer_db_work, vntb_epf_peer_db_work);
> + disable_work(&ntb->peer_db_work);
> + atomic64_set(&ntb->peer_db_pending, 0);
> +

Does it need call disable_work_sync() in unbind or remove() function?

Frank

> /* Initially, no bar is assigned */
> for (i = 0; i < VNTB_BAR_NUM; i++)
> ntb->epf_ntb_bar[i] = NO_BAR;
> --
> 2.51.0
>