[PATCH 01/15] cxl/aer/pci: Add CXL PCIe port error handler callbacks in AER service driver

From: Terry Bowman
Date: Tue Oct 08 2024 - 18:17:51 EST


CXL protocol errors are reported to the OS through PCIe correctable and
uncorrectable internal errors. However, since CXL PCIe port devices
are currently bound to the portdrv driver, there is no mechanism to
notify the CXL driver, which is necessary for proper logging and
handling.

To address this, introduce CXL PCIe port error callbacks along with
register/unregister and accessor functions. The callbacks will be
invoked by the AER driver in the case protocol errors are reported by
a CXL port device.

The AER driver callbacks will be used in future patches implementing
CXL PCIe port error handling.

Signed-off-by: Terry Bowman <terry.bowman@xxxxxxx>
---
drivers/pci/pcie/aer.c | 22 ++++++++++++++++++++++
include/linux/aer.h | 14 ++++++++++++++
2 files changed, 36 insertions(+)

diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 13b8586924ea..a9792b9576b4 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -50,6 +50,8 @@ struct aer_rpc {
DECLARE_KFIFO(aer_fifo, struct aer_err_source, AER_ERROR_SOURCES_MAX);
};

+static struct cxl_port_err_hndlrs cxl_port_hndlrs;
+
/* AER stats for the device */
struct aer_stats {

@@ -1078,6 +1080,26 @@ static inline void cxl_rch_handle_error(struct pci_dev *dev,
struct aer_err_info *info) { }
#endif

+void register_cxl_port_hndlrs(struct cxl_port_err_hndlrs *_cxl_port_hndlrs)
+{
+ cxl_port_hndlrs.error_detected = _cxl_port_hndlrs->error_detected;
+ cxl_port_hndlrs.cor_error_detected = _cxl_port_hndlrs->cor_error_detected;
+}
+EXPORT_SYMBOL_NS_GPL(register_cxl_port_hndlrs, CXL);
+
+void unregister_cxl_port_hndlrs(void)
+{
+ cxl_port_hndlrs.error_detected = NULL;
+ cxl_port_hndlrs.cor_error_detected = NULL;
+}
+EXPORT_SYMBOL_NS_GPL(unregister_cxl_port_hndlrs, CXL);
+
+struct cxl_port_err_hndlrs *find_cxl_port_hndlrs(void)
+{
+ return &cxl_port_hndlrs;
+}
+EXPORT_SYMBOL_NS_GPL(find_cxl_port_hndlrs, CXL);
+
/**
* pci_aer_handle_error - handle logging error into an event log
* @dev: pointer to pci_dev data structure of error source device
diff --git a/include/linux/aer.h b/include/linux/aer.h
index 4b97f38f3fcf..67fd04c5ae2b 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -10,6 +10,7 @@

#include <linux/errno.h>
#include <linux/types.h>
+#include <linux/pci.h>

#define AER_NONFATAL 0
#define AER_FATAL 1
@@ -55,5 +56,18 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
int cper_severity_to_aer(int cper_severity);
void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
int severity, struct aer_capability_regs *aer_regs);
+
+struct cxl_port_err_hndlrs {
+
+ /* CXL uncorrectable error detected on this device */
+ pci_ers_result_t (*error_detected)(struct pci_dev *dev,
+ pci_channel_state_t error);
+
+ /* CXL corrected error detected on this device */
+ void (*cor_error_detected)(struct pci_dev *dev);
+};
+void register_cxl_port_hndlrs(struct cxl_port_err_hndlrs *_cxl_port_hndlrs);
+void unregister_cxl_port_hndlrs(void);
+struct cxl_port_err_hndlrs *find_cxl_port_hndlrs(void);
#endif //_AER_H_

--
2.34.1