[PATCH v2 04/14] PCI/AER: Modify AER driver logging to report CXL or PCIe bus error type

From: Terry Bowman
Date: Fri Oct 25 2024 - 17:05:23 EST


The AER driver and aer_event tracing currently log 'PCIe Bus Type'
for all errors.

Update the driver and aer_event tracing to log 'CXL Bus Type' for CXL devices.

Signed-off-by: Terry Bowman <terry.bowman@xxxxxxx>
---
drivers/pci/pcie/aer.c | 14 ++++++++------
include/ras/ras_event.h | 9 ++++++---
2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index fe6edf26279e..53e9a11f6c0f 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -699,13 +699,14 @@ static void __aer_print_error(struct pci_dev *dev,

void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
{
+ const char *bus_type = pcie_is_cxl(dev) ? "CXL" : "PCIe";
int layer, agent;
int id = pci_dev_id(dev);
const char *level;

if (!info->status) {
- pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
- aer_error_severity_string[info->severity]);
+ pci_err(dev, "%s Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
+ bus_type, aer_error_severity_string[info->severity]);
goto out;
}

@@ -714,8 +715,8 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)

level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR;

- pci_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
- aer_error_severity_string[info->severity],
+ pci_printk(level, dev, "%s Bus Error: severity=%s, type=%s, (%s)\n",
+ bus_type, aer_error_severity_string[info->severity],
aer_error_layer[layer], aer_agent_string[agent]);

pci_printk(level, dev, " device [%04x:%04x] error status/mask=%08x/%08x\n",
@@ -730,7 +731,7 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
if (info->id && info->error_dev_num > 1 && info->id == id)
pci_err(dev, " Error of this Agent is reported first\n");

- trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
+ trace_aer_event(dev_name(&dev->dev), bus_type, (info->status & ~info->mask),
info->severity, info->tlp_header_valid, &info->tlp);
}

@@ -764,6 +765,7 @@ EXPORT_SYMBOL_GPL(cper_severity_to_aer);
void pci_print_aer(struct pci_dev *dev, int aer_severity,
struct aer_capability_regs *aer)
{
+ const char *bus_type = pcie_is_cxl(dev) ? "CXL" : "PCIe";
int layer, agent, tlp_header_valid = 0;
u32 status, mask;
struct aer_err_info info;
@@ -798,7 +800,7 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
if (tlp_header_valid)
__print_tlp_header(dev, &aer->header_log);

- trace_aer_event(dev_name(&dev->dev), (status & ~mask),
+ trace_aer_event(dev_name(&dev->dev), bus_type, (status & ~mask),
aer_severity, tlp_header_valid, &aer->header_log);
}
EXPORT_SYMBOL_NS_GPL(pci_print_aer, CXL);
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index e5f7ee0864e7..1bf8e7050ba8 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -297,15 +297,17 @@ TRACE_EVENT(non_standard_event,

TRACE_EVENT(aer_event,
TP_PROTO(const char *dev_name,
+ const char *bus_type,
const u32 status,
const u8 severity,
const u8 tlp_header_valid,
struct pcie_tlp_log *tlp),

- TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp),
+ TP_ARGS(dev_name, bus_type, status, severity, tlp_header_valid, tlp),

TP_STRUCT__entry(
__string( dev_name, dev_name )
+ __string( bus_type, bus_type )
__field( u32, status )
__field( u8, severity )
__field( u8, tlp_header_valid)
@@ -314,6 +316,7 @@ TRACE_EVENT(aer_event,

TP_fast_assign(
__assign_str(dev_name);
+ __assign_str(bus_type);
__entry->status = status;
__entry->severity = severity;
__entry->tlp_header_valid = tlp_header_valid;
@@ -325,8 +328,8 @@ TRACE_EVENT(aer_event,
}
),

- TP_printk("%s PCIe Bus Error: severity=%s, %s, TLP Header=%s\n",
- __get_str(dev_name),
+ TP_printk("%s %s Bus Error: severity=%s, %s, TLP Header=%s\n",
+ __get_str(dev_name), __get_str(bus_type),
__entry->severity == AER_CORRECTABLE ? "Corrected" :
__entry->severity == AER_FATAL ?
"Fatal" : "Uncorrected, non-fatal",
--
2.34.1