[PATCH v11 5/8] PCI/AER: Allow clearing Error Status Register in FF mode

From: sathyanarayanan . kuppuswamy
Date: Thu Dec 26 2019 - 19:41:56 EST


From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@xxxxxxxxxxxxxxx>

As per PCI firmware specification r3.2 System Firmware Intermediary
(SFI) _OSC and DPC Updates ECR
(https://members.pcisig.com/wg/PCI-SIG/document/13563), sec titled
"DPC Event Handling Implementation Note", page 10, Error Disconnect
Recover (EDR) support allows OS to handle error recovery and clearing
Error Registers even in FF mode. So create exception for FF mode checks
in pci_cleanup_aer_uncorrect_error_status(), pci_aer_clear_fatal_status()
and pci_cleanup_aer_error_status_regs() functions when its being called
from DPC code path.

Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@xxxxxxxxxxxxxxx>
Acked-by: Keith Busch <keith.busch@xxxxxxxxx>
---
Documentation/PCI/pcieaer-howto.rst | 2 +-
drivers/net/ethernet/intel/ice/ice_main.c | 2 +-
drivers/ntb/hw/idt/ntb_hw_idt.c | 2 +-
drivers/pci/pci.c | 2 +-
drivers/pci/pci.h | 2 +-
drivers/pci/pcie/aer.c | 16 ++++++++--------
drivers/pci/pcie/dpc.c | 4 ++--
drivers/pci/pcie/err.c | 2 +-
drivers/scsi/lpfc/lpfc_attr.c | 2 +-
include/linux/aer.h | 12 ++++++++----
10 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/Documentation/PCI/pcieaer-howto.rst b/Documentation/PCI/pcieaer-howto.rst
index 18bdefaafd1a..184c966b61cb 100644
--- a/Documentation/PCI/pcieaer-howto.rst
+++ b/Documentation/PCI/pcieaer-howto.rst
@@ -243,7 +243,7 @@ messages to root port when an error is detected.

::

- int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);`
+ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev, bool ff_check);

pci_cleanup_aer_uncorrect_error_status cleanups the uncorrectable
error status register.
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 69bff085acf7..8378dbf3500b 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -3491,7 +3491,7 @@ static pci_ers_result_t ice_pci_err_slot_reset(struct pci_dev *pdev)
result = PCI_ERS_RESULT_DISCONNECT;
}

- err = pci_cleanup_aer_uncorrect_error_status(pdev);
+ err = pci_cleanup_aer_uncorrect_error_status(pdev, 1);
if (err)
dev_dbg(&pdev->dev,
"pci_cleanup_aer_uncorrect_error_status failed, error %d\n",
diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c
index dcf234680535..9023308be2de 100644
--- a/drivers/ntb/hw/idt/ntb_hw_idt.c
+++ b/drivers/ntb/hw/idt/ntb_hw_idt.c
@@ -2675,7 +2675,7 @@ static int idt_init_pci(struct idt_ntb_dev *ndev)
if (ret != 0)
dev_warn(&pdev->dev, "PCIe AER capability disabled\n");
else /* Cleanup uncorrectable error status before getting to init */
- pci_cleanup_aer_uncorrect_error_status(pdev);
+ pci_cleanup_aer_uncorrect_error_status(pdev, 1);

/* First enable the PCI device */
ret = pcim_enable_device(pdev);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index e87196cc1a7f..6264244d92bf 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1499,7 +1499,7 @@ void pci_restore_state(struct pci_dev *dev)
pci_restore_rebar_state(dev);
pci_restore_dpc_state(dev);

- pci_cleanup_aer_error_status_regs(dev);
+ pci_cleanup_aer_error_status_regs(dev, 1);
pci_restore_aer_state(dev);

pci_restore_config_space(dev);
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index a0a53bd05a0b..0b4452f72a9a 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -646,7 +646,7 @@ void pci_no_aer(void);
void pci_aer_init(struct pci_dev *dev);
void pci_aer_exit(struct pci_dev *dev);
extern const struct attribute_group aer_stats_attr_group;
-void pci_aer_clear_fatal_status(struct pci_dev *dev);
+void pci_aer_clear_fatal_status(struct pci_dev *dev, bool ff_check);
void pci_aer_clear_device_status(struct pci_dev *dev);
#else
static inline void pci_no_aer(void) { }
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 1ca86f2e0166..c64a91b347d2 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -376,7 +376,7 @@ void pci_aer_clear_device_status(struct pci_dev *dev)
pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta);
}

-int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
+int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev, bool ff_check)
{
int pos;
u32 status, sev;
@@ -385,7 +385,7 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
if (!pos)
return -EIO;

- if (pcie_aer_get_firmware_first(dev))
+ if (ff_check && pcie_aer_get_firmware_first(dev))
return -EIO;

/* Clear status bits for ERR_NONFATAL errors only */
@@ -399,7 +399,7 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
}
EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status);

-void pci_aer_clear_fatal_status(struct pci_dev *dev)
+void pci_aer_clear_fatal_status(struct pci_dev *dev, bool ff_check)
{
int pos;
u32 status, sev;
@@ -408,8 +408,8 @@ void pci_aer_clear_fatal_status(struct pci_dev *dev)
if (!pos)
return;

- if (pcie_aer_get_firmware_first(dev))
- return;
+ if (ff_check && pcie_aer_get_firmware_first(dev))
+ return -EIO;

/* Clear status bits for ERR_FATAL errors only */
pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
@@ -419,7 +419,7 @@ void pci_aer_clear_fatal_status(struct pci_dev *dev)
pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
}

-int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
+int pci_cleanup_aer_error_status_regs(struct pci_dev *dev, bool ff_check)
{
int pos;
u32 status;
@@ -432,7 +432,7 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
if (!pos)
return -EIO;

- if (pcie_aer_get_firmware_first(dev))
+ if (ff_check && pcie_aer_get_firmware_first(dev))
return -EIO;

port_type = pci_pcie_type(dev);
@@ -515,7 +515,7 @@ void pci_aer_init(struct pci_dev *dev)
n = pcie_cap_has_rtctl(dev) ? 5 : 4;
pci_add_ext_cap_save_buffer(dev, PCI_EXT_CAP_ID_ERR, sizeof(u32) * n);

- pci_cleanup_aer_error_status_regs(dev);
+ pci_cleanup_aer_error_status_regs(dev, 1);
}

void pci_aer_exit(struct pci_dev *dev)
diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
index b19d707db222..e711a3747f48 100644
--- a/drivers/pci/pcie/dpc.c
+++ b/drivers/pci/pcie/dpc.c
@@ -280,8 +280,8 @@ static void dpc_process_error(struct dpc_dev *dpc)
dpc_get_aer_uncorrect_severity(pdev, &info) &&
aer_get_device_error_info(pdev, &info)) {
aer_print_error(pdev, &info);
- pci_cleanup_aer_uncorrect_error_status(pdev);
- pci_aer_clear_fatal_status(pdev);
+ pci_cleanup_aer_uncorrect_error_status(pdev, 0);
+ pci_aer_clear_fatal_status(pdev, 0);
}

/* We configure DPC so it only triggers on ERR_FATAL */
diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index 53cd9200ec2c..043eab17e45d 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -236,7 +236,7 @@ void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state,
pci_walk_bus(bus, report_resume, &status);

pci_aer_clear_device_status(dev);
- pci_cleanup_aer_uncorrect_error_status(dev);
+ pci_cleanup_aer_uncorrect_error_status(dev, 1);
pci_info(dev, "AER: Device recovery successful\n");
return;

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 4ff82b36a37a..5b37efd29e20 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -4810,7 +4810,7 @@ lpfc_aer_cleanup_state(struct device *dev, struct device_attribute *attr,
return -EINVAL;

if (phba->hba_flag & HBA_AER_ENABLED)
- rc = pci_cleanup_aer_uncorrect_error_status(phba->pcidev);
+ rc = pci_cleanup_aer_uncorrect_error_status(phba->pcidev, 1);

if (rc == 0)
return strlen(buf);
diff --git a/include/linux/aer.h b/include/linux/aer.h
index fa19e01f418a..f4f49df500ad 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -44,8 +44,10 @@ struct aer_capability_regs {
/* PCIe port driver needs this function to enable AER */
int pci_enable_pcie_error_reporting(struct pci_dev *dev);
int pci_disable_pcie_error_reporting(struct pci_dev *dev);
-int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
-int pci_cleanup_aer_error_status_regs(struct pci_dev *dev);
+int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev,
+ bool ff_check);
+int pci_cleanup_aer_error_status_regs(struct pci_dev *dev,
+ bool ff_check);
void pci_save_aer_state(struct pci_dev *dev);
void pci_restore_aer_state(struct pci_dev *dev);
#else
@@ -57,11 +59,13 @@ static inline int pci_disable_pcie_error_reporting(struct pci_dev *dev)
{
return -EINVAL;
}
-static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
+static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev,
+ bool ff_check)
{
return -EINVAL;
}
-static inline int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
+static inline int pci_cleanup_aer_error_status_regs(struct pci_dev *dev,
+ bool ff_check)
{
return -EINVAL;
}
--
2.21.0