[PATCH v17 03/11] cxl: Use common CPER handling for all CXL devices

From: Terry Bowman

Date: Tue May 05 2026 - 13:31:52 EST


Fold the Port and Endpoint specific paths in cxl_cper_handle_prot_err()
into a single code path. Drop the PCI type dispatch block as both Port
and Endpoint devices now go through the same code path.

Extend the pdev->dev.driver != NULL gate to Port devices, which previously
bypassed it. This check and the existing device lock will ensure the CXL
device remains accessible while in scope.

Recent trace event changes generalize the interface to take a
struct device * for all CXL devices. Update the Endpoint CPER path
to pass &pdev->dev (the PCI device) instead of &cxlmd->dev (the
memdev). This makes the trace event's "device=" field show the PCI
BDF for all CPER callers, replacing the prior "device=memN" output
for Endpoints. Userspace consumers correlating CPER trace events to
memdev names must map the PCI BDF back via /sys/bus/cxl/devices/.

Remove the bus_find_device(&cxl_bus_type, ..., match_memdev_by_parent)
lookup along with the match_memdev_by_parent() helper.

Signed-off-by: Terry Bowman <terry.bowman@xxxxxxx>

---

Changes in v16->v17:
- New commit
---
drivers/cxl/core/ras.c | 81 +++++++-----------------------------------
1 file changed, 13 insertions(+), 68 deletions(-)

diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
index d7081caaf5d3..56611da8357a 100644
--- a/drivers/cxl/core/ras.c
+++ b/drivers/cxl/core/ras.c
@@ -8,65 +8,28 @@
#include <cxlpci.h>
#include "trace.h"

-static void cxl_cper_trace_corr_port_prot_err(struct pci_dev *pdev,
- struct cxl_ras_capability_regs ras_cap)
+static void cxl_cper_trace_corr_prot_err(struct pci_dev *pdev, u64 serial,
+ struct cxl_ras_capability_regs *ras_cap)
{
- u32 status = ras_cap.cor_status & ~ras_cap.cor_mask;
+ u32 status = ras_cap->cor_status & ~ras_cap->cor_mask;

- trace_cxl_aer_correctable_error(&pdev->dev, status, pci_get_dsn(pdev));
+ trace_cxl_aer_correctable_error(&pdev->dev, status, serial);
}

-static void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev,
- struct cxl_ras_capability_regs ras_cap)
+static void cxl_cper_trace_uncorr_prot_err(struct pci_dev *pdev, u64 serial,
+ struct cxl_ras_capability_regs *ras_cap)
{
- u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask;
+ u32 status = ras_cap->uncor_status & ~ras_cap->uncor_mask;
u32 fe;

if (hweight32(status) > 1)
fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
- ras_cap.cap_control));
+ ras_cap->cap_control));
else
fe = status;

trace_cxl_aer_uncorrectable_error(&pdev->dev, status, fe,
- ras_cap.header_log,
- pci_get_dsn(pdev));
-}
-
-static void cxl_cper_trace_corr_prot_err(struct pci_dev *pdev,
- struct cxl_memdev *cxlmd,
- struct cxl_ras_capability_regs ras_cap)
-{
- u32 status = ras_cap.cor_status & ~ras_cap.cor_mask;
-
- trace_cxl_aer_correctable_error(&cxlmd->dev, status,
- pci_get_dsn(pdev));
-}
-
-static void
-cxl_cper_trace_uncorr_prot_err(struct pci_dev *pdev,
- struct cxl_memdev *cxlmd,
- struct cxl_ras_capability_regs ras_cap)
-{
- u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask;
- u32 fe;
-
- if (hweight32(status) > 1)
- fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
- ras_cap.cap_control));
- else
- fe = status;
-
- trace_cxl_aer_uncorrectable_error(&cxlmd->dev, status, fe,
- ras_cap.header_log,
- pci_get_dsn(pdev));
-}
-
-static int match_memdev_by_parent(struct device *dev, const void *uport)
-{
- if (is_cxl_memdev(dev) && dev->parent == uport)
- return 1;
- return 0;
+ ras_cap->header_log, serial);
}

void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
@@ -77,38 +40,20 @@ void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
pci_get_domain_bus_and_slot(data->prot_err.agent_addr.segment,
data->prot_err.agent_addr.bus,
devfn);
- struct cxl_memdev *cxlmd;
- int port_type;

if (!pdev)
return;

- port_type = pci_pcie_type(pdev);
- if (port_type == PCI_EXP_TYPE_ROOT_PORT ||
- port_type == PCI_EXP_TYPE_DOWNSTREAM ||
- port_type == PCI_EXP_TYPE_UPSTREAM) {
- if (data->severity == AER_CORRECTABLE)
- cxl_cper_trace_corr_port_prot_err(pdev, data->ras_cap);
- else
- cxl_cper_trace_uncorr_port_prot_err(pdev, data->ras_cap);
-
- return;
- }
-
guard(device)(&pdev->dev);
if (!pdev->dev.driver)
return;

- struct device *mem_dev __free(put_device) = bus_find_device(
- &cxl_bus_type, NULL, pdev, match_memdev_by_parent);
- if (!mem_dev)
- return;
-
- cxlmd = to_cxl_memdev(mem_dev);
if (data->severity == AER_CORRECTABLE)
- cxl_cper_trace_corr_prot_err(pdev, cxlmd, data->ras_cap);
+ cxl_cper_trace_corr_prot_err(pdev, pci_get_dsn(pdev),
+ &data->ras_cap);
else
- cxl_cper_trace_uncorr_prot_err(pdev, cxlmd, data->ras_cap);
+ cxl_cper_trace_uncorr_prot_err(pdev, pci_get_dsn(pdev),
+ &data->ras_cap);
}
EXPORT_SYMBOL_GPL(cxl_cper_handle_prot_err);

--
2.34.1