Re: [RESEND v13 06/25] cxl: Move CXL driver's RCH error handling into core/ras_rch.c

From: Jonathan Cameron
Date: Tue Nov 04 2025 - 13:04:53 EST


On Tue, 4 Nov 2025 11:02:46 -0600
Terry Bowman <terry.bowman@xxxxxxx> wrote:

> Restricted CXL Host (RCH) protocol error handling uses a procedure distinct
> from the CXL Virtual Hierarchy (VH) handling. This is because of the
> differences in the RCH and VH topologies. Improve the maintainability and
> add ability to enable/disable RCH handling.
>
> Move and combine the RCH handling code into a single block conditionally
> compiled with the CONFIG_CXL_RCH_RAS kernel config.
>
> Signed-off-by: Terry Bowman <terry.bowman@xxxxxxx>
>
> ---
>
Fairly sure this patch had changes seeing as code now in a different file.

A few minor comments inline. With those tidied up
Reviewed-by: Jonathan Cameron <jonathan.cameron@xxxxxxxxxx>


J

> Changes in v12->v13:
> - None
>
> Changes v11->v12:
> - Moved CXL_RCH_RAS Kconfig definition here from following commit.
>
> Changes v10->v11:
> - New patch
> ---
> drivers/cxl/Kconfig | 7 +++
> drivers/cxl/core/Makefile | 1 +
> drivers/cxl/core/core.h | 5 +-
> drivers/cxl/core/pci.c | 115 -----------------------------------
> drivers/cxl/core/ras_rch.c | 120 +++++++++++++++++++++++++++++++++++++
> tools/testing/cxl/Kbuild | 1 +
> 6 files changed, 132 insertions(+), 117 deletions(-)
> create mode 100644 drivers/cxl/core/ras_rch.c
>
> diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig
> index 217888992c88..ffe6ad981434 100644
> --- a/drivers/cxl/Kconfig
> +++ b/drivers/cxl/Kconfig
> @@ -237,4 +237,11 @@ config CXL_RAS
> def_bool y
> depends on ACPI_APEI_GHES && PCIEAER && CXL_PCI
>
> +config CXL_RCH_RAS
> + bool "CXL: Restricted CXL Host (RCH) protocol error handling"
> + def_bool n

Don't need to specify a default of no as that is always the default if
not overridden.

> + depends on CXL_RAS
> + help
> + RAS support for Restricted CXL Host (RCH) defined in CXL1.1.
> +
> endif
> diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
> index b2930cc54f8b..fa1d4aed28b9 100644
> --- a/drivers/cxl/core/Makefile
> +++ b/drivers/cxl/core/Makefile
> @@ -20,3 +20,4 @@ cxl_core-$(CONFIG_CXL_MCE) += mce.o
> cxl_core-$(CONFIG_CXL_FEATURES) += features.o
> cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o
> cxl_core-$(CONFIG_CXL_RAS) += ras.o
> +cxl_core-$(CONFIG_CXL_RCH_RAS) += ras_rch.o
> diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
> index bc818de87ccc..c30ab7c25a92 100644
> --- a/drivers/cxl/core/core.h
> +++ b/drivers/cxl/core/core.h
> @@ -4,6 +4,7 @@
> #ifndef __CXL_CORE_H__
> #define __CXL_CORE_H__
>
> +#include <linux/pci.h>
Why this include. I'm not spotting anything pci specific being added to this header.
If it should already have been here, belongs in a separate patch.

> #include <cxl/mailbox.h>
> #include <linux/rwsem.h>
>
> @@ -167,7 +168,7 @@ static inline void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem
> #endif /* CONFIG_CXL_RAS */
>
> /* Restricted CXL Host specific RAS functions */
> -#ifdef CONFIG_CXL_RAS
> +#ifdef CONFIG_CXL_RCH_RAS
> void cxl_dport_map_rch_aer(struct cxl_dport *dport);
> void cxl_disable_rch_root_ints(struct cxl_dport *dport);
> void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds);
> @@ -175,7 +176,7 @@ void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds);
> static inline void cxl_dport_map_rch_aer(struct cxl_dport *dport) { }
> static inline void cxl_disable_rch_root_ints(struct cxl_dport *dport) { }
> static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
> -#endif /* CONFIG_CXL_RAS */
> +#endif /* CONFIG_CXL_RCH_RAS */
>
> int cxl_gpf_port_setup(struct cxl_dport *dport);

> diff --git a/drivers/cxl/core/ras_rch.c b/drivers/cxl/core/ras_rch.c
> new file mode 100644
> index 000000000000..f6de5492a8b7
> --- /dev/null
> +++ b/drivers/cxl/core/ras_rch.c
> @@ -0,0 +1,120 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/* Copyright(c) 2025 AMD Corporation. All rights reserved. */
> +
> +#include <linux/pci.h>
> +#include <linux/aer.h>
> +#include <cxl/event.h>
> +#include <cxlmem.h>
> +#include "trace.h"
We should be trying to follow include what you use principles.
So linux/types.h for resource_size_t
Probably a forwards def for struct device as well.


> +
> +void cxl_dport_map_rch_aer(struct cxl_dport *dport)
> +{
> + resource_size_t aer_phys;
> + struct device *host;
> + u16 aer_cap;
> +
> + aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base);
> + if (aer_cap) {
> + host = dport->reg_map.host;
> + aer_phys = aer_cap + dport->rcrb.base;
> + dport->regs.dport_aer = devm_cxl_iomap_block(host, aer_phys,
> + sizeof(struct aer_capability_regs));
Maybe keep original alignment or even something like
dport->regs.dport_aer =
devm_cxl_iomap_block(host, aer_phys,
sizeof(struct aer_capability_regs));


> + }
> +}
> +