[RFC PATCH v4 24/27] cxl/core: Add cxl_sysram region type

From: Gregory Price

Date: Sun Feb 22 2026 - 03:57:06 EST


Add the CXL sysram region for direct memory hotplug of CXL RAM regions.

This region eliminates the intermediate dax_region/dax device layer by
directly performing memory hotplug operations.

Key features:
- Supports memory tier integration for proper NUMA placement
- Uses the CXL_SYSRAM_ONLINE_* Kconfig options for default online type
- Automatically hotplugs memory on probe if online type is configured
- Will be extended to support private memory nodes in the future

The driver registers a sysram_regionN device as a child of the CXL
region, managing the memory hotplug lifecycle through device add/remove.

Signed-off-by: Gregory Price <gourry@xxxxxxxxxx>
---
drivers/cxl/core/Makefile | 1 +
drivers/cxl/core/core.h | 4 +
drivers/cxl/core/port.c | 2 +
drivers/cxl/core/region_sysram.c | 351 +++++++++++++++++++++++++++++++
drivers/cxl/cxl.h | 48 +++++
5 files changed, 406 insertions(+)
create mode 100644 drivers/cxl/core/region_sysram.c

diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
index d3ec8aea64c5..d7ce52c50810 100644
--- a/drivers/cxl/core/Makefile
+++ b/drivers/cxl/core/Makefile
@@ -18,6 +18,7 @@ cxl_core-$(CONFIG_TRACING) += trace.o
cxl_core-$(CONFIG_CXL_REGION) += region.o
cxl_core-$(CONFIG_CXL_REGION) += region_dax.o
cxl_core-$(CONFIG_CXL_REGION) += region_pmem.o
+cxl_core-$(CONFIG_CXL_REGION) += region_sysram.o
cxl_core-$(CONFIG_CXL_MCE) += mce.o
cxl_core-$(CONFIG_CXL_FEATURES) += features.o
cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 6e1f695fd155..973bbcae43f7 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -35,6 +35,7 @@ extern struct device_attribute dev_attr_delete_region;
extern struct device_attribute dev_attr_region;
extern const struct device_type cxl_pmem_region_type;
extern const struct device_type cxl_dax_region_type;
+extern const struct device_type cxl_sysram_type;
extern const struct device_type cxl_region_type;

int cxl_decoder_detach(struct cxl_region *cxlr,
@@ -46,6 +47,7 @@ int cxl_decoder_detach(struct cxl_region *cxlr,
#define SET_CXL_REGION_ATTR(x) (&dev_attr_##x.attr),
#define CXL_PMEM_REGION_TYPE(x) (&cxl_pmem_region_type)
#define CXL_DAX_REGION_TYPE(x) (&cxl_dax_region_type)
+#define CXL_SYSRAM_TYPE(x) (&cxl_sysram_type)
int cxl_region_init(void);
void cxl_region_exit(void);
int cxl_get_poison_by_endpoint(struct cxl_port *port);
@@ -54,6 +56,7 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
u64 dpa);
int devm_cxl_add_dax_region(struct cxl_region *cxlr, enum dax_driver_type);
int devm_cxl_add_pmem_region(struct cxl_region *cxlr);
+int devm_cxl_add_sysram(struct cxl_region *cxlr, enum mmop online_type);

#else
static inline u64 cxl_dpa_to_hpa(struct cxl_region *cxlr,
@@ -88,6 +91,7 @@ static inline void cxl_region_exit(void)
#define SET_CXL_REGION_ATTR(x)
#define CXL_PMEM_REGION_TYPE(x) NULL
#define CXL_DAX_REGION_TYPE(x) NULL
+#define CXL_SYSRAM_TYPE(x) NULL
#endif

struct cxl_send_command;
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 5c82e6f32572..d6e82b3c2b64 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -66,6 +66,8 @@ static int cxl_device_id(const struct device *dev)
return CXL_DEVICE_PMEM_REGION;
if (dev->type == CXL_DAX_REGION_TYPE())
return CXL_DEVICE_DAX_REGION;
+ if (dev->type == CXL_SYSRAM_TYPE())
+ return CXL_DEVICE_SYSRAM;
if (is_cxl_port(dev)) {
if (is_cxl_root(to_cxl_port(dev)))
return CXL_DEVICE_ROOT;
diff --git a/drivers/cxl/core/region_sysram.c b/drivers/cxl/core/region_sysram.c
new file mode 100644
index 000000000000..47a415deb352
--- /dev/null
+++ b/drivers/cxl/core/region_sysram.c
@@ -0,0 +1,351 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2026 Meta Platforms, Inc. All rights reserved. */
+/*
+ * CXL Sysram Region - Direct memory hotplug for CXL RAM regions
+ *
+ * This interface directly performs memory hotplug for CXL RAM regions,
+ * eliminating the indirection through DAX.
+ */
+
+#include <linux/memory_hotplug.h>
+#include <linux/memory-tiers.h>
+#include <linux/memory.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <cxlmem.h>
+#include <cxl.h>
+#include "core.h"
+
+static const char *sysram_res_name = "System RAM (CXL)";
+
+/**
+ * cxl_region_find_sysram - Find the sysram device associated with a region
+ * @cxlr: The CXL region
+ *
+ * Finds and returns the sysram child device of a CXL region.
+ * The caller must release the device reference with put_device()
+ * when done with the returned pointer.
+ *
+ * Return: Pointer to cxl_sysram, or NULL if not found
+ */
+struct cxl_sysram *cxl_region_find_sysram(struct cxl_region *cxlr)
+{
+ struct cxl_sysram *sysram;
+ struct device *sdev;
+ char sname[32];
+
+ snprintf(sname, sizeof(sname), "sysram_region%d", cxlr->id);
+ sdev = device_find_child_by_name(&cxlr->dev, sname);
+ if (!sdev)
+ return NULL;
+
+ sysram = to_cxl_sysram(sdev);
+ return sysram;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_region_find_sysram, "CXL");
+
+static int sysram_get_numa_node(struct cxl_region *cxlr)
+{
+ struct cxl_region_params *p = &cxlr->params;
+ int nid;
+
+ nid = phys_to_target_node(p->res->start);
+ if (nid == NUMA_NO_NODE)
+ nid = memory_add_physaddr_to_nid(p->res->start);
+
+ return nid;
+}
+
+static int sysram_hotplug_add(struct cxl_sysram *sysram, enum mmop online_type)
+{
+ struct resource *res;
+ mhp_t mhp_flags;
+ int rc;
+
+ if (sysram->res)
+ return -EBUSY;
+
+ res = request_mem_region(sysram->hpa_range.start,
+ range_len(&sysram->hpa_range),
+ sysram->res_name);
+ if (!res)
+ return -EBUSY;
+
+ sysram->res = res;
+
+ /*
+ * Set flags appropriate for System RAM. Leave ..._BUSY clear
+ * so that add_memory() can add a child resource.
+ */
+ res->flags = IORESOURCE_SYSTEM_RAM;
+
+ mhp_flags = MHP_NID_IS_MGID;
+
+ /*
+ * Ensure that future kexec'd kernels will not treat
+ * this as RAM automatically.
+ */
+ rc = __add_memory_driver_managed(sysram->mgid,
+ sysram->hpa_range.start,
+ range_len(&sysram->hpa_range),
+ sysram_res_name, mhp_flags,
+ online_type);
+ if (rc) {
+ remove_resource(res);
+ kfree(res);
+ sysram->res = NULL;
+ return rc;
+ }
+
+ return 0;
+}
+
+static int sysram_hotplug_remove(struct cxl_sysram *sysram)
+{
+ int rc;
+
+ if (!sysram->res)
+ return 0;
+
+ rc = offline_and_remove_memory(sysram->hpa_range.start,
+ range_len(&sysram->hpa_range));
+ if (rc)
+ return rc;
+
+ if (sysram->res) {
+ remove_resource(sysram->res);
+ kfree(sysram->res);
+ sysram->res = NULL;
+ }
+
+ return 0;
+}
+
+int cxl_sysram_offline_and_remove(struct cxl_sysram *sysram)
+{
+ return sysram_hotplug_remove(sysram);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_sysram_offline_and_remove, "CXL");
+
+static void cxl_sysram_release(struct device *dev)
+{
+ struct cxl_sysram *sysram = to_cxl_sysram(dev);
+
+ if (sysram->res)
+ sysram_hotplug_remove(sysram);
+
+ kfree(sysram->res_name);
+
+ if (sysram->mgid >= 0)
+ memory_group_unregister(sysram->mgid);
+
+ if (sysram->mtype)
+ clear_node_memory_type(sysram->numa_node, sysram->mtype);
+
+ kfree(sysram);
+}
+
+static ssize_t hotplug_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct cxl_sysram *sysram = to_cxl_sysram(dev);
+ int online_type, rc;
+
+ online_type = mhp_online_type_from_str(buf);
+ if (online_type < 0)
+ return online_type;
+
+ if (online_type == MMOP_OFFLINE)
+ rc = sysram_hotplug_remove(sysram);
+ else
+ rc = sysram_hotplug_add(sysram, online_type);
+
+ if (rc)
+ dev_warn(dev, "hotplug %s failed: %d\n",
+ online_type == MMOP_OFFLINE ? "offline" : "online", rc);
+
+ return rc ? rc : len;
+}
+static DEVICE_ATTR_WO(hotplug);
+
+static struct attribute *cxl_sysram_attrs[] = {
+ &dev_attr_hotplug.attr,
+ NULL
+};
+
+static const struct attribute_group cxl_sysram_attribute_group = {
+ .attrs = cxl_sysram_attrs,
+};
+
+static const struct attribute_group *cxl_sysram_attribute_groups[] = {
+ &cxl_base_attribute_group,
+ &cxl_sysram_attribute_group,
+ NULL
+};
+
+const struct device_type cxl_sysram_type = {
+ .name = "cxl_sysram",
+ .release = cxl_sysram_release,
+ .groups = cxl_sysram_attribute_groups,
+};
+
+static bool is_cxl_sysram(struct device *dev)
+{
+ return dev->type == &cxl_sysram_type;
+}
+
+struct cxl_sysram *to_cxl_sysram(struct device *dev)
+{
+ if (dev_WARN_ONCE(dev, !is_cxl_sysram(dev),
+ "not a cxl_sysram device\n"))
+ return NULL;
+ return container_of(dev, struct cxl_sysram, dev);
+}
+EXPORT_SYMBOL_NS_GPL(to_cxl_sysram, "CXL");
+
+struct device *cxl_sysram_dev(struct cxl_sysram *sysram)
+{
+ return &sysram->dev;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_sysram_dev, "CXL");
+
+static struct lock_class_key cxl_sysram_key;
+
+static enum mmop cxl_sysram_get_default_online_type(void)
+{
+ if (IS_ENABLED(CONFIG_CXL_SYSRAM_ONLINE_TYPE_SYSTEM_DEFAULT))
+ return mhp_get_default_online_type();
+ if (IS_ENABLED(CONFIG_CXL_SYSRAM_ONLINE_TYPE_MOVABLE))
+ return MMOP_ONLINE_MOVABLE;
+ if (IS_ENABLED(CONFIG_CXL_SYSRAM_ONLINE_TYPE_NORMAL))
+ return MMOP_ONLINE;
+ return MMOP_OFFLINE;
+}
+
+static struct cxl_sysram *cxl_sysram_alloc(struct cxl_region *cxlr)
+{
+ struct cxl_sysram *sysram __free(kfree) = NULL;
+ struct device *dev;
+
+ sysram = kzalloc(sizeof(*sysram), GFP_KERNEL);
+ if (!sysram)
+ return ERR_PTR(-ENOMEM);
+
+ sysram->online_type = cxl_sysram_get_default_online_type();
+ sysram->last_hotplug_cmd = MMOP_OFFLINE;
+ sysram->numa_node = -1;
+ sysram->mgid = -1;
+
+ dev = &sysram->dev;
+ sysram->cxlr = cxlr;
+ device_initialize(dev);
+ lockdep_set_class(&dev->mutex, &cxl_sysram_key);
+ device_set_pm_not_required(dev);
+ dev->parent = &cxlr->dev;
+ dev->bus = &cxl_bus_type;
+ dev->type = &cxl_sysram_type;
+
+ return_ptr(sysram);
+}
+
+static void sysram_unregister(void *_sysram)
+{
+ struct cxl_sysram *sysram = _sysram;
+
+ device_unregister(&sysram->dev);
+}
+
+int devm_cxl_add_sysram(struct cxl_region *cxlr, enum mmop online_type)
+{
+ struct cxl_sysram *sysram __free(put_cxl_sysram) = NULL;
+ struct memory_dev_type *mtype;
+ struct range hpa_range;
+ struct device *dev;
+ int adist = MEMTIER_DEFAULT_LOWTIER_ADISTANCE;
+ int numa_node;
+ int rc;
+
+ rc = cxl_region_get_hpa_range(cxlr, &hpa_range);
+ if (rc)
+ return rc;
+
+ hpa_range = memory_block_align_range(&hpa_range);
+ if (hpa_range.start >= hpa_range.end) {
+ dev_warn(&cxlr->dev, "region too small after alignment\n");
+ return -ENOSPC;
+ }
+
+ sysram = cxl_sysram_alloc(cxlr);
+ if (IS_ERR(sysram))
+ return PTR_ERR(sysram);
+
+ sysram->hpa_range = hpa_range;
+
+ sysram->res_name = kasprintf(GFP_KERNEL, "cxl_sysram%d", cxlr->id);
+ if (!sysram->res_name)
+ return -ENOMEM;
+
+ /* Override default online type if caller specified one */
+ if (online_type >= 0)
+ sysram->online_type = online_type;
+
+ dev = &sysram->dev;
+
+ rc = dev_set_name(dev, "sysram_region%d", cxlr->id);
+ if (rc)
+ return rc;
+
+ /* Setup memory tier before adding device */
+ numa_node = sysram_get_numa_node(cxlr);
+ if (numa_node < 0) {
+ dev_warn(&cxlr->dev, "rejecting region with invalid node: %d\n",
+ numa_node);
+ return -EINVAL;
+ }
+ sysram->numa_node = numa_node;
+
+ mt_calc_adistance(numa_node, &adist);
+ mtype = mt_get_memory_type(adist);
+ if (IS_ERR(mtype))
+ return PTR_ERR(mtype);
+ sysram->mtype = mtype;
+
+ init_node_memory_type(numa_node, mtype);
+
+ /* Register memory group for this region */
+ rc = memory_group_register_static(numa_node,
+ PFN_UP(range_len(&hpa_range)));
+ if (rc < 0)
+ return rc;
+ sysram->mgid = rc;
+
+ rc = device_add(dev);
+ if (rc)
+ return rc;
+
+ dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
+ dev_name(dev));
+
+ /*
+ * Dynamic capacity regions (DCD) will have memory added later.
+ * For static RAM regions, hotplug the entire range now.
+ */
+ if (cxlr->mode != CXL_PARTMODE_RAM)
+ goto out;
+
+ /* If default online_type is a valid online mode, immediately hotplug */
+ if (sysram->online_type > MMOP_OFFLINE) {
+ rc = sysram_hotplug_add(sysram, sysram->online_type);
+ if (rc)
+ dev_warn(dev, "hotplug failed: %d\n", rc);
+ else
+ sysram->last_hotplug_cmd = sysram->online_type;
+ }
+
+out:
+ return devm_add_action_or_reset(&cxlr->dev, sysram_unregister,
+ no_free_ptr(sysram));
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_add_sysram, "CXL");
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index f899f240f229..8e8342fd4fde 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -607,6 +607,34 @@ struct cxl_dax_region {
enum dax_driver_type dax_driver;
};

+/**
+ * struct cxl_sysram - CXL SysRAM region for system memory hotplug
+ * @dev: device for this sysram
+ * @cxlr: parent cxl_region
+ * @online_type: Default memory online type for new hotplug ops (MMOP_* value)
+ * @last_hotplug_cmd: Last hotplug command submitted (MMOP_* value)
+ * @hpa_range: Host physical address range for the region
+ * @res_name: Resource name for the memory region
+ * @res: Memory resource (set when hotplugged)
+ * @mgid: Memory group id
+ * @mtype: Memory tier type
+ * @numa_node: NUMA node for this memory
+ *
+ * Device that directly performs memory hotplug for CXL RAM regions.
+ */
+struct cxl_sysram {
+ struct device dev;
+ struct cxl_region *cxlr;
+ enum mmop online_type;
+ int last_hotplug_cmd;
+ struct range hpa_range;
+ const char *res_name;
+ struct resource *res;
+ int mgid;
+ struct memory_dev_type *mtype;
+ int numa_node;
+};
+
/**
* struct cxl_port - logical collection of upstream port devices and
* downstream port devices to construct a CXL memory
@@ -807,6 +835,7 @@ DEFINE_FREE(put_cxl_port, struct cxl_port *, if (!IS_ERR_OR_NULL(_T)) put_device
DEFINE_FREE(put_cxl_root_decoder, struct cxl_root_decoder *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->cxlsd.cxld.dev))
DEFINE_FREE(put_cxl_region, struct cxl_region *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev))
DEFINE_FREE(put_cxl_dax_region, struct cxl_dax_region *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev))
+DEFINE_FREE(put_cxl_sysram, struct cxl_sysram *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev))

int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd);
void cxl_bus_rescan(void);
@@ -889,6 +918,7 @@ void cxl_destroy_region(struct cxl_region *cxlr);
struct device *cxl_region_dev(struct cxl_region *cxlr);
enum cxl_partition_mode cxl_region_mode(struct cxl_region *cxlr);
int cxl_get_region_range(struct cxl_region *cxlr, struct range *range);
+struct cxl_sysram *cxl_region_find_sysram(struct cxl_region *cxlr);
int cxl_get_committed_regions(struct cxl_memdev *cxlmd,
struct cxl_region **regions, int max_regions);
struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
@@ -936,6 +966,7 @@ void cxl_driver_unregister(struct cxl_driver *cxl_drv);
#define CXL_DEVICE_PMEM_REGION 7
#define CXL_DEVICE_DAX_REGION 8
#define CXL_DEVICE_PMU 9
+#define CXL_DEVICE_SYSRAM 10

#define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*")
#define CXL_MODALIAS_FMT "cxl:t%d"
@@ -954,6 +985,10 @@ bool is_cxl_pmem_region(struct device *dev);
struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev);
int cxl_add_to_region(struct cxl_endpoint_decoder *cxled);
struct cxl_dax_region *to_cxl_dax_region(struct device *dev);
+struct cxl_sysram *to_cxl_sysram(struct device *dev);
+struct device *cxl_sysram_dev(struct cxl_sysram *sysram);
+int devm_cxl_add_sysram(struct cxl_region *cxlr, enum mmop online_type);
+int cxl_sysram_offline_and_remove(struct cxl_sysram *sysram);
u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa);
#else
static inline bool is_cxl_pmem_region(struct device *dev)
@@ -972,6 +1007,19 @@ static inline struct cxl_dax_region *to_cxl_dax_region(struct device *dev)
{
return NULL;
}
+static inline struct cxl_sysram *to_cxl_sysram(struct device *dev)
+{
+ return NULL;
+}
+static inline int devm_cxl_add_sysram(struct cxl_region *cxlr,
+ enum mmop online_type)
+{
+ return -ENXIO;
+}
+static inline int cxl_sysram_offline_and_remove(struct cxl_sysram *sysram)
+{
+ return -ENXIO;
+}
static inline u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint,
u64 spa)
{
--
2.53.0