Re: [PATCH v17 03/11] cxl: Use common CPER handling for all CXL devices
From: Dave Jiang
Date: Tue May 05 2026 - 18:03:06 EST
On 5/5/26 10:30 AM, Terry Bowman wrote:
> Fold the Port and Endpoint specific paths in cxl_cper_handle_prot_err()
> into a single code path. Drop the PCI type dispatch block as both Port
> and Endpoint devices now go through the same code path.
>
> Extend the pdev->dev.driver != NULL gate to Port devices, which previously
> bypassed it. This check and the existing device lock will ensure the CXL
> device remains accessible while in scope.
>
> Recent trace event changes generalize the interface to take a
> struct device * for all CXL devices. Update the Endpoint CPER path
> to pass &pdev->dev (the PCI device) instead of &cxlmd->dev (the
> memdev). This makes the trace event's "device=" field show the PCI
> BDF for all CPER callers, replacing the prior "device=memN" output
> for Endpoints. Userspace consumers correlating CPER trace events to
> memdev names must map the PCI BDF back via /sys/bus/cxl/devices/.
>
> Remove the bus_find_device(&cxl_bus_type, ..., match_memdev_by_parent)
> lookup along with the match_memdev_by_parent() helper.
>
> Signed-off-by: Terry Bowman <terry.bowman@xxxxxxx>
Reviewed-by: Dave Jiang <dave.jiang@xxxxxxxxx>
>
> ---
>
> Changes in v16->v17:
> - New commit
> ---
> drivers/cxl/core/ras.c | 81 +++++++-----------------------------------
> 1 file changed, 13 insertions(+), 68 deletions(-)
>
> diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
> index d7081caaf5d3..56611da8357a 100644
> --- a/drivers/cxl/core/ras.c
> +++ b/drivers/cxl/core/ras.c
> @@ -8,65 +8,28 @@
> #include <cxlpci.h>
> #include "trace.h"
>
> -static void cxl_cper_trace_corr_port_prot_err(struct pci_dev *pdev,
> - struct cxl_ras_capability_regs ras_cap)
> +static void cxl_cper_trace_corr_prot_err(struct pci_dev *pdev, u64 serial,
> + struct cxl_ras_capability_regs *ras_cap)
> {
> - u32 status = ras_cap.cor_status & ~ras_cap.cor_mask;
> + u32 status = ras_cap->cor_status & ~ras_cap->cor_mask;
>
> - trace_cxl_aer_correctable_error(&pdev->dev, status, pci_get_dsn(pdev));
> + trace_cxl_aer_correctable_error(&pdev->dev, status, serial);
> }
>
> -static void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev,
> - struct cxl_ras_capability_regs ras_cap)
> +static void cxl_cper_trace_uncorr_prot_err(struct pci_dev *pdev, u64 serial,
> + struct cxl_ras_capability_regs *ras_cap)
> {
> - u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask;
> + u32 status = ras_cap->uncor_status & ~ras_cap->uncor_mask;
> u32 fe;
>
> if (hweight32(status) > 1)
> fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
> - ras_cap.cap_control));
> + ras_cap->cap_control));
> else
> fe = status;
>
> trace_cxl_aer_uncorrectable_error(&pdev->dev, status, fe,
> - ras_cap.header_log,
> - pci_get_dsn(pdev));
> -}
> -
> -static void cxl_cper_trace_corr_prot_err(struct pci_dev *pdev,
> - struct cxl_memdev *cxlmd,
> - struct cxl_ras_capability_regs ras_cap)
> -{
> - u32 status = ras_cap.cor_status & ~ras_cap.cor_mask;
> -
> - trace_cxl_aer_correctable_error(&cxlmd->dev, status,
> - pci_get_dsn(pdev));
> -}
> -
> -static void
> -cxl_cper_trace_uncorr_prot_err(struct pci_dev *pdev,
> - struct cxl_memdev *cxlmd,
> - struct cxl_ras_capability_regs ras_cap)
> -{
> - u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask;
> - u32 fe;
> -
> - if (hweight32(status) > 1)
> - fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
> - ras_cap.cap_control));
> - else
> - fe = status;
> -
> - trace_cxl_aer_uncorrectable_error(&cxlmd->dev, status, fe,
> - ras_cap.header_log,
> - pci_get_dsn(pdev));
> -}
> -
> -static int match_memdev_by_parent(struct device *dev, const void *uport)
> -{
> - if (is_cxl_memdev(dev) && dev->parent == uport)
> - return 1;
> - return 0;
> + ras_cap->header_log, serial);
> }
>
> void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
> @@ -77,38 +40,20 @@ void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
> pci_get_domain_bus_and_slot(data->prot_err.agent_addr.segment,
> data->prot_err.agent_addr.bus,
> devfn);
> - struct cxl_memdev *cxlmd;
> - int port_type;
>
> if (!pdev)
> return;
>
> - port_type = pci_pcie_type(pdev);
> - if (port_type == PCI_EXP_TYPE_ROOT_PORT ||
> - port_type == PCI_EXP_TYPE_DOWNSTREAM ||
> - port_type == PCI_EXP_TYPE_UPSTREAM) {
> - if (data->severity == AER_CORRECTABLE)
> - cxl_cper_trace_corr_port_prot_err(pdev, data->ras_cap);
> - else
> - cxl_cper_trace_uncorr_port_prot_err(pdev, data->ras_cap);
> -
> - return;
> - }
> -
> guard(device)(&pdev->dev);
> if (!pdev->dev.driver)
> return;
>
> - struct device *mem_dev __free(put_device) = bus_find_device(
> - &cxl_bus_type, NULL, pdev, match_memdev_by_parent);
> - if (!mem_dev)
> - return;
> -
> - cxlmd = to_cxl_memdev(mem_dev);
> if (data->severity == AER_CORRECTABLE)
> - cxl_cper_trace_corr_prot_err(pdev, cxlmd, data->ras_cap);
> + cxl_cper_trace_corr_prot_err(pdev, pci_get_dsn(pdev),
> + &data->ras_cap);
> else
> - cxl_cper_trace_uncorr_prot_err(pdev, cxlmd, data->ras_cap);
> + cxl_cper_trace_uncorr_prot_err(pdev, pci_get_dsn(pdev),
> + &data->ras_cap);
> }
> EXPORT_SYMBOL_GPL(cxl_cper_handle_prot_err);
>