Re: [PATCH 2/6] cxl: add sysram_region memory controller

From: David Hildenbrand (Red Hat)

Date: Mon Jan 12 2026 - 15:01:00 EST


On 1/12/26 17:35, Gregory Price wrote:
Add a sysram memctrl that directly hotplugs memory without needing to
route through DAX. This simplifies the sysram usecase considerably.

The sysram memctl adds new sysfs controls when registered:
region/memctrl/[hotplug, hotunplug, state]

hotplug: controller attempts to hotplug the memory region

Why disconnect the hotplug from the online state?

echo online_movable > hotplug ?

Then we can just have something like add_and_online_memory() in the core.

hotunplug: controller attempts to offline and hotunplug the memory region
state: [online,online_normal,offline]
online : controller onlines blocks in ZONE_MOVABLE

I don't like this incosistency regarding the remainder of common hotplug toggles.

We should use exactly the same values with exactly the same semantics. Yes, user-space tooling should be thaught to pass in online_movable :)

online_normal: controller onlines blocks in ZONE_NORMAL
offline : controller attempts to offline the memory blocks

Why is that required? ideally we'd start with hotplug vs. hotunplug and leave manual onlining/offlining out of this interface for now.


Hotplug note - by default the controller will hotplug the blocks, but
leave them offline (unless MHP auto-online in Kconfig is enabled).

Setting state to "online_normal" may prevent future hot-unplug of sysram
regions, and unbinding a memory region with memory online in ZONE_NORMAL
may result in the device being removed but the memory remaining online.

This can result in future management functions failing (such as adding a
new region). This is why "online_normal" is explicit, and the default
online zone is ZONE_MOVABLE.

Cc: David Hildenbrand <david@xxxxxxxxxx>
Signed-off-by: Gregory Price <gourry@xxxxxxxxxx>
---
drivers/cxl/core/core.h | 2 +
drivers/cxl/core/memctrl/Makefile | 1 +
drivers/cxl/core/memctrl/memctrl.c | 2 +
drivers/cxl/core/memctrl/sysram_region.c | 358 +++++++++++++++++++++++
drivers/cxl/core/region.c | 5 +
drivers/cxl/cxl.h | 6 +-
6 files changed, 372 insertions(+), 2 deletions(-)
create mode 100644 drivers/cxl/core/memctrl/sysram_region.c

diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 1156a4bd0080..18cb84950500 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -31,6 +31,8 @@ int cxl_decoder_detach(struct cxl_region *cxlr,
struct cxl_endpoint_decoder *cxled, int pos,
enum cxl_detach_mode mode);
+int devm_cxl_add_sysram_region(struct cxl_region *cxlr);
+
#define CXL_REGION_ATTR(x) (&dev_attr_##x.attr)
#define CXL_REGION_TYPE(x) (&cxl_region_type)
#define SET_CXL_REGION_ATTR(x) (&dev_attr_##x.attr),
diff --git a/drivers/cxl/core/memctrl/Makefile b/drivers/cxl/core/memctrl/Makefile
index 8165aad5a52a..1c52c7d75570 100644
--- a/drivers/cxl/core/memctrl/Makefile
+++ b/drivers/cxl/core/memctrl/Makefile
@@ -2,3 +2,4 @@
cxl_core-$(CONFIG_CXL_REGION) += memctrl/memctrl.o
cxl_core-$(CONFIG_CXL_REGION) += memctrl/dax_region.o
+cxl_core-$(CONFIG_CXL_REGION) += memctrl/sysram_region.o
diff --git a/drivers/cxl/core/memctrl/memctrl.c b/drivers/cxl/core/memctrl/memctrl.c
index 24e0e14b39c7..40ffb59353bb 100644
--- a/drivers/cxl/core/memctrl/memctrl.c
+++ b/drivers/cxl/core/memctrl/memctrl.c
@@ -34,6 +34,8 @@ int cxl_enable_memctrl(struct cxl_region *cxlr)
return devm_cxl_add_dax_region(cxlr);
case CXL_MEMCTRL_DAX:
return devm_cxl_add_dax_region(cxlr);
+ case CXL_MEMCTRL_SYSRAM:
+ return devm_cxl_add_sysram_region(cxlr);
default:
return -EINVAL;
}
diff --git a/drivers/cxl/core/memctrl/sysram_region.c b/drivers/cxl/core/memctrl/sysram_region.c
new file mode 100644
index 000000000000..a7570c8a54e1
--- /dev/null
+++ b/drivers/cxl/core/memctrl/sysram_region.c
@@ -0,0 +1,358 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2026 Meta Inc. All rights reserved. */
+#include <linux/memremap.h>
+#include <linux/memory.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/memory-tiers.h>
+#include <linux/memory_hotplug.h>
+#include <linux/string_helpers.h>
+#include <linux/sched/signal.h>
+#include <cxlmem.h>
+#include <cxl.h>
+#include "../core.h"
+
+/* If HMAT was unavailable, assign a default distance. */
+#define MEMTIER_DEFAULT_CXL_ADISTANCE (MEMTIER_ADISTANCE_DRAM * 5)
+
+static const char *sysram_name = "System RAM (CXL)";
+
+struct cxl_sysram_data {
+ const char *res_name;
+ int mgid;
+ struct resource *res;
+};
+
+static DEFINE_MUTEX(cxl_memory_type_lock);
+static LIST_HEAD(cxl_memory_types);
+
+static struct cxl_region *to_cxl_region(struct device *dev)
+{
+ if (dev->type != &cxl_region_type)
+ return NULL;
+ return container_of(dev, struct cxl_region, dev);
+}
+
+static struct memory_dev_type *cxl_find_alloc_memory_type(int adist)
+{
+ guard(mutex)(&cxl_memory_type_lock);
+ return mt_find_alloc_memory_type(adist, &cxl_memory_types);
+}
+
+static void __maybe_unused cxl_put_memory_types(void)
+{
+ guard(mutex)(&cxl_memory_type_lock);
+ mt_put_memory_types(&cxl_memory_types);
+}
+
+static int cxl_sysram_range(struct cxl_region *cxlr, struct range *r)
+{
+ struct cxl_region_params *p = &cxlr->params;
+
+ if (!p->res)
+ return -ENODEV;
+
+ /* memory-block align the hotplug range */
+ r->start = ALIGN(p->res->start, memory_block_size_bytes());
+ r->end = ALIGN_DOWN(p->res->end + 1, memory_block_size_bytes()) - 1;
+ if (r->start >= r->end) {
+ r->start = p->res->start;
+ r->end = p->res->end;
+ return -ENOSPC;
+ }
+ return 0;
+}
+
+static ssize_t hotunplug_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct cxl_region *cxlr = to_cxl_region(dev);
+ struct range range;
+ int rc;
+
+ if (!cxlr)
+ return -ENODEV;
+
+ rc = cxl_sysram_range(cxlr, &range);
+ if (rc)
+ return rc;
+
+ rc = offline_and_remove_memory(range.start, range_len(&range));
+
+ if (rc)
+ return rc;
+
+ return len;
+}
+static DEVICE_ATTR_WO(hotunplug);
+
+struct online_memory_cb_arg {
+ int online_type;
+ int rc;
+};
+
+static int online_memory_block_cb(struct memory_block *mem, void *arg)
+{
+ struct online_memory_cb_arg *cb_arg = arg;
+
+ if (signal_pending(current))
+ return -EINTR;
+
+ cond_resched();
+
+ if (mem->state == MEM_ONLINE)
+ return 0;
+
+ mem->online_type = cb_arg->online_type;
+ cb_arg->rc = device_online(&mem->dev);
+
+ return cb_arg->rc;
+}
+
+static int offline_memory_block_cb(struct memory_block *mem, void *arg)
+{
+ int *rc = arg;
+
+ if (signal_pending(current))
+ return -EINTR;
+
+ cond_resched();
+
+ if (mem->state == MEM_OFFLINE)
+ return 0;
+
+ *rc = device_offline(&mem->dev);
+
+ return *rc;
+}
+
+static ssize_t state_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct cxl_region *cxlr = to_cxl_region(dev);
+ struct online_memory_cb_arg cb_arg;
+ struct range range;
+ int rc;
+
+ if (!cxlr)
+ return -ENODEV;
+
+ rc = cxl_sysram_range(cxlr, &range);
+ if (rc)
+ return rc;
+
+ rc = lock_device_hotplug_sysfs();
+ if (rc)
+ return rc;
+
+ if (sysfs_streq(buf, "online")) {
+ cb_arg.online_type = MMOP_ONLINE_MOVABLE;
+ cb_arg.rc = 0;
+ rc = walk_memory_blocks(range.start, range_len(&range),
+ &cb_arg, online_memory_block_cb);
+ if (!rc)
+ rc = cb_arg.rc;
+ } else if (sysfs_streq(buf, "online_normal")) {
+ cb_arg.online_type = MMOP_ONLINE;
+ cb_arg.rc = 0;
+ rc = walk_memory_blocks(range.start, range_len(&range),
+ &cb_arg, online_memory_block_cb);
+ if (!rc)
+ rc = cb_arg.rc;
+ } else if (sysfs_streq(buf, "offline")) {
+ int offline_rc = 0;
+
+ rc = walk_memory_blocks(range.start, range_len(&range),
+ &offline_rc, offline_memory_block_cb);
+ if (!rc)
+ rc = offline_rc;

Let's expose this functionality through some common-code helpers. I really don't want more code doing this non-obvious device_offline() etc dance.

walk_memory_blocks() should become a core-mm helper. Maybe we can also cleanup drivers/acpi/acpi_memhotplug.c in that regard.

Hopefully we can then also reuse these helpers in ppc code (see dlpar_add_lmb() and dlpar_remove_lmb() that do something similar, but grab the device hotplug lock themselves as they want to perform some additional operations).

--
Cheers

David