Re: [PATCH v2] PCI/DPC: Fix shared interrupt handling

From: Bjorn Helgaas
Date: Wed Jan 10 2018 - 16:45:54 EST


On Thu, Dec 14, 2017 at 08:20:18AM -0700, Alex Williamson wrote:
> DPC supports shared interrupts, but it plays very loosely with testing
> whether the interrupt is generated by DPC before generating spurious
> log messages, such as:
>
> dpc 0000:10:01.2:pcie010: DPC containment event, status:0x1f00 source:0x0000
>
> Testing the status register for zero or -1 is not sufficient when the
> device supports the RP PIO First Error Pointer register. Change this
> to test whether the interrupt is enabled in the control register,
> retaining the device present test, and that the status reports the
> interrupt as signaled and DPC is triggered, clearing as a spurious
> interrupt otherwise.
>
> Additionally, since the interrupt is actually serviced by a workqueue,
> disable the interrupt in the control register until that completes or
> else we may never see it execute due to further incoming interrupts.
> A software generated DPC floods the system otherwise.
>
> Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx>

Applied with Keith's reviewed-by on pci/dpc for v4.16, thanks!

> ---
>
> v2: Fix interrupt re-enable as spotted by Keith, tested multiple
> injections via software trigger.
>
> drivers/pci/pcie/pcie-dpc.c | 60 +++++++++++++++++++++++++++++--------------
> 1 file changed, 40 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/pci/pcie/pcie-dpc.c b/drivers/pci/pcie/pcie-dpc.c
> index 2d976a623ddc..f7cf5ae7dec2 100644
> --- a/drivers/pci/pcie/pcie-dpc.c
> +++ b/drivers/pci/pcie/pcie-dpc.c
> @@ -109,6 +109,7 @@ static void interrupt_event_handler(struct work_struct *work)
> struct dpc_dev *dpc = container_of(work, struct dpc_dev, work);
> struct pci_dev *dev, *temp, *pdev = dpc->dev->port;
> struct pci_bus *parent = pdev->subordinate;
> + u16 ctl;
>
> pci_lock_rescan_remove();
> list_for_each_entry_safe_reverse(dev, temp, &parent->devices,
> @@ -135,6 +136,10 @@ static void interrupt_event_handler(struct work_struct *work)
>
> pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_STATUS,
> PCI_EXP_DPC_STATUS_TRIGGER | PCI_EXP_DPC_STATUS_INTERRUPT);
> +
> + pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl);
> + pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL,
> + ctl | PCI_EXP_DPC_CTL_INT_EN);
> }
>
> static void dpc_rp_pio_print_tlp_header(struct device *dev,
> @@ -249,34 +254,49 @@ static irqreturn_t dpc_irq(int irq, void *context)
> struct dpc_dev *dpc = (struct dpc_dev *)context;
> struct pci_dev *pdev = dpc->dev->port;
> struct device *dev = &dpc->dev->device;
> - u16 status, source;
> + u16 ctl, status, source, reason, ext_reason;
> +
> + pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl);
> +
> + if (!(ctl & PCI_EXP_DPC_CTL_INT_EN) || ctl == (u16)(~0))
> + return IRQ_NONE;
>
> pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_STATUS, &status);
> +
> + if (!(status & PCI_EXP_DPC_STATUS_INTERRUPT))
> + return IRQ_NONE;
> +
> + if (!(status & PCI_EXP_DPC_STATUS_TRIGGER)) {
> + pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_STATUS,
> + PCI_EXP_DPC_STATUS_INTERRUPT);
> + return IRQ_HANDLED;
> + }
> +
> + pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL,
> + ctl & ~PCI_EXP_DPC_CTL_INT_EN);
> +
> pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_SOURCE_ID,
> &source);
> - if (!status || status == (u16)(~0))
> - return IRQ_NONE;
>
> dev_info(dev, "DPC containment event, status:%#06x source:%#06x\n",
> status, source);
>
> - if (status & PCI_EXP_DPC_STATUS_TRIGGER) {
> - u16 reason = (status >> 1) & 0x3;
> - u16 ext_reason = (status >> 5) & 0x3;
> -
> - dev_warn(dev, "DPC %s detected, remove downstream devices\n",
> - (reason == 0) ? "unmasked uncorrectable error" :
> - (reason == 1) ? "ERR_NONFATAL" :
> - (reason == 2) ? "ERR_FATAL" :
> - (ext_reason == 0) ? "RP PIO error" :
> - (ext_reason == 1) ? "software trigger" :
> - "reserved error");
> - /* show RP PIO error detail information */
> - if (reason == 3 && ext_reason == 0)
> - dpc_process_rp_pio_error(dpc);
> -
> - schedule_work(&dpc->work);
> - }
> + reason = (status >> 1) & 0x3;
> + ext_reason = (status >> 5) & 0x3;
> +
> + dev_warn(dev, "DPC %s detected, remove downstream devices\n",
> + (reason == 0) ? "unmasked uncorrectable error" :
> + (reason == 1) ? "ERR_NONFATAL" :
> + (reason == 2) ? "ERR_FATAL" :
> + (ext_reason == 0) ? "RP PIO error" :
> + (ext_reason == 1) ? "software trigger" :
> + "reserved error");
> + /* show RP PIO error detail information */
> + if (reason == 3 && ext_reason == 0)
> + dpc_process_rp_pio_error(dpc);
> +
> + schedule_work(&dpc->work);
> +
> return IRQ_HANDLED;
> }
>
>