Re: [PATCH v5 3/7] cxl: Add memory offlining and cache flush helpers
From: Dave Jiang
Date: Mon Mar 09 2026 - 19:01:25 EST
On 3/6/26 2:23 AM, smadhavan@xxxxxxxxxx wrote:
> From: Srirangan Madhavan <smadhavan@xxxxxxxxxx>
>
> Add infrastructure for quiescing the CXL data path before reset:
>
> - Memory offlining: check if CXL-backed memory is online and offline
> it via offline_and_remove_memory() before reset, per CXL
> spec requirement to quiesce all CXL.mem transactions before issuing
> CXL Reset.
> - CPU cache flush: invalidate cache lines before reset
> as a safety measure after memory offline.
>
> Signed-off-by: Srirangan Madhavan <smadhavan@xxxxxxxxxx>
> ---
> drivers/cxl/core/pci.c | 110 +++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 110 insertions(+)
>
> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> index f96ce884a213..9e6f0c4b3cb6 100644
> --- a/drivers/cxl/core/pci.c
> +++ b/drivers/cxl/core/pci.c
> @@ -4,6 +4,8 @@
> #include <linux/io-64-nonatomic-lo-hi.h>
> #include <linux/device.h>
> #include <linux/delay.h>
> +#include <linux/memory_hotplug.h>
> +#include <linux/memregion.h>
> #include <linux/pci.h>
> #include <linux/pci-doe.h>
> #include <linux/aer.h>
> @@ -869,3 +871,111 @@ int cxl_port_get_possible_dports(struct cxl_port *port)
>
> return ctx.count;
> }
> +
> +/*
> + * CXL Reset support - core-provided reset logic for CXL devices.
> + *
> + * These functions implement the CXL reset sequence.
> + */
> +
> +/*
> + * If CXL memory backed by this decoder is online as System RAM, offline
> + * and remove it per CXL spec requirements before issuing CXL Reset.
> + * Returns 0 if memory was not online or was successfully offlined.
> + */
> +static int __maybe_unused cxl_offline_memory(struct device *dev, void *data)
> +{
> + struct cxl_endpoint_decoder *cxled;
> + struct cxl_region *cxlr;
> + struct cxl_region_params *p;
> + int rc;
> +
> + if (!is_endpoint_decoder(dev))
> + return 0;
> +
> + cxled = to_cxl_endpoint_decoder(dev);
> + cxlr = cxled->cxld.region;
> + if (!cxlr)
> + return 0;
> +
> + p = &cxlr->params;
> + if (!p->res)
> + return 0;
> +
> + if (walk_iomem_res_desc(IORES_DESC_NONE,
> + IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
> + p->res->start, p->res->end, NULL, NULL) <= 0)
This function is performed per endpoint. So if a region is backed by multiple endpoints, wouldn't this memory offline operation be performed over the same region on every related endpoint instead of just once? Maybe a temp xarray during the reset process that keeps track of the regions that are being hit with reset?
> + return 0;
> +
> + dev_info(dev, "Offlining CXL memory [%pr] for reset\n", p->res);
> +
> +#ifdef CONFIG_MEMORY_HOTREMOVE
> + rc = offline_and_remove_memory(p->res->start, resource_size(p->res));
> + if (rc) {
> + dev_err(dev,
> + "Failed to offline CXL memory [%pr]: %d\n",
> + p->res, rc);
> + return rc;
> + }
> +#else
> + dev_err(dev, "Memory hotremove not supported, cannot offline CXL memory\n");
> + rc = -EOPNOTSUPP;
> + return rc;
> +#endif
Same comment as Alex. ifdef in C files are not preferred. Maybe a helper function can be used and stubbed out when !CONFIG_MEMORY_HOTREMOVE.
> +
> + return 0;
> +}
> +
> +static int __maybe_unused cxl_reset_prepare_memdev(struct cxl_memdev *cxlmd)
> +{
> + struct cxl_port *endpoint;
> + struct device *dev;
> +
> + if (!cxlmd || !cxlmd->cxlds)
> + return -ENODEV;
> +
> + dev = cxlmd->cxlds->dev;
> + endpoint = cxlmd->endpoint;
> + if (!endpoint)
> + return 0;
> +
> + return device_for_each_child(&endpoint->dev, NULL,
> + cxl_offline_memory);
> +}
> +
> +static int __maybe_unused cxl_decoder_flush_cache(struct device *dev, void *data)
> +{
> + struct cxl_endpoint_decoder *cxled;
> + struct cxl_region *cxlr;
> + struct resource *res;
> +
> + if (!is_endpoint_decoder(dev))
> + return 0;
> +
> + cxled = to_cxl_endpoint_decoder(dev);
> + cxlr = cxled->cxld.region;
> + if (!cxlr || !cxlr->params.res)
> + return 0;
> +
> + res = cxlr->params.res;
> + cpu_cache_invalidate_memregion(res->start, resource_size(res));
Same comment as offline memory. Cache being invalidated per region for every decoder. Probably not something you want to do.
DJ
> + return 0;
> +}
> +
> +static int __maybe_unused cxl_reset_flush_cpu_caches(struct cxl_memdev *cxlmd)
> +{
> + struct cxl_port *endpoint;
> +
> + if (!cxlmd)
> + return 0;
> +
> + endpoint = cxlmd->endpoint;
> + if (!endpoint || IS_ERR(endpoint))
> + return 0;
> +
> + if (!cpu_cache_has_invalidate_memregion())
> + return 0;
> +
> + device_for_each_child(&endpoint->dev, NULL, cxl_decoder_flush_cache);
> + return 0;
> +}
> --
> 2.43.0
>