Re: [PATCH 1/2] mm,memory_hotplug: Implement numa node notifier

From: David Hildenbrand
Date: Fri Apr 04 2025 - 06:10:48 EST


On 01.04.25 11:27, Oscar Salvador wrote:
There are at least four consumers of hotplug_memory_notifier that what they
really are interested in is whether any numa node changed its state, e.g: going
from being memory aware to becoming memoryless.

Implement a specific notifier for numa nodes when their state gets changed,
and have those consumers that only care about numa node state changes use it.

Signed-off-by: Oscar Salvador <osalvador@xxxxxxx>
---
drivers/acpi/numa/hmat.c | 6 +--
drivers/base/node.c | 19 +++++++++
drivers/cxl/core/region.c | 14 +++----
drivers/cxl/cxl.h | 4 +-
include/linux/memory.h | 38 ++++++++++++++++++
kernel/cgroup/cpuset.c | 2 +-
mm/memory-tiers.c | 8 ++--
mm/memory_hotplug.c | 84 +++++++++++++++++++++++++++++----------
mm/slub.c | 22 +++++-----
9 files changed, 148 insertions(+), 49 deletions(-)

diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c
index bfbb08b1e6af..d18f3efa2149 100644
--- a/drivers/acpi/numa/hmat.c
+++ b/drivers/acpi/numa/hmat.c
@@ -918,10 +918,10 @@ static int hmat_callback(struct notifier_block *self,
unsigned long action, void *arg)
{
struct memory_target *target;
- struct memory_notify *mnb = arg;
+ struct node_notify *mnb = arg;
int pxm, nid = mnb->status_change_nid;
- if (nid == NUMA_NO_NODE || action != MEM_ONLINE)
+ if (nid == NUMA_NO_NODE || action != NODE_BECAME_MEM_AWARE)
return NOTIFY_OK;
pxm = node_to_pxm(nid);
@@ -1074,7 +1074,7 @@ static __init int hmat_init(void)
hmat_register_targets();
/* Keep the table and structures if the notifier may use them */
- if (hotplug_memory_notifier(hmat_callback, HMAT_CALLBACK_PRI))
+ if (hotplug_node_notifier(hmat_callback, HMAT_CALLBACK_PRI))
goto out_put;
if (!hmat_set_default_dram_perf())
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 0ea653fa3433..182c71dfb5b8 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -110,6 +110,25 @@ static const struct attribute_group *node_access_node_groups[] = {
NULL,
};
+static BLOCKING_NOTIFIER_HEAD(node_chain);
+
+int register_node_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&node_chain, nb);
+}
+EXPORT_SYMBOL(register_node_notifier);
+
+void unregister_node_notifier(struct notifier_block *nb)
+{
+ blocking_notifier_chain_unregister(&node_chain, nb);
+}
+EXPORT_SYMBOL(unregister_node_notifier);
+
+int node_notify(unsigned long val, void *v)
+{
+ return blocking_notifier_call_chain(&node_chain, val, v);
+}
+
static void node_remove_accesses(struct node *node)
{
struct node_access_nodes *c, *cnext;
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index e8d11a988fd9..7d187088f557 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2409,12 +2409,12 @@ static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
unsigned long action, void *arg)
{
struct cxl_region *cxlr = container_of(nb, struct cxl_region,
- memory_notifier);
- struct memory_notify *mnb = arg;
+ node_notifier);
+ struct node_notify *mnb = arg;
int nid = mnb->status_change_nid;
int region_nid;
- if (nid == NUMA_NO_NODE || action != MEM_ONLINE)
+ if (nid == NUMA_NO_NODE || action != NODE_BECAME_MEM_AWARE)
return NOTIFY_DONE;
/*
@@ -3388,7 +3388,7 @@ static void shutdown_notifiers(void *_cxlr)
{
struct cxl_region *cxlr = _cxlr;
- unregister_memory_notifier(&cxlr->memory_notifier);
+ unregister_node_notifier(&cxlr->node_notifier);
unregister_mt_adistance_algorithm(&cxlr->adist_notifier);
}
@@ -3427,9 +3427,9 @@ static int cxl_region_probe(struct device *dev)
if (rc)
return rc;
- cxlr->memory_notifier.notifier_call = cxl_region_perf_attrs_callback;
- cxlr->memory_notifier.priority = CXL_CALLBACK_PRI;
- register_memory_notifier(&cxlr->memory_notifier);
+ cxlr->node_notifier.notifier_call = cxl_region_perf_attrs_callback;
+ cxlr->node_notifier.priority = CXL_CALLBACK_PRI;
+ register_node_notifier(&cxlr->node_notifier);
cxlr->adist_notifier.notifier_call = cxl_region_calculate_adistance;
cxlr->adist_notifier.priority = 100;
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index bbbaa0d0a670..d4c9a499de7a 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -532,7 +532,7 @@ struct cxl_region_params {
* @flags: Region state flags
* @params: active + config params for the region
* @coord: QoS access coordinates for the region
- * @memory_notifier: notifier for setting the access coordinates to node
+ * @node_notifier: notifier for setting the access coordinates to node
* @adist_notifier: notifier for calculating the abstract distance of node
*/
struct cxl_region {
@@ -545,7 +545,7 @@ struct cxl_region {
unsigned long flags;
struct cxl_region_params params;
struct access_coordinate coord[ACCESS_COORDINATE_MAX];
- struct notifier_block memory_notifier;
+ struct notifier_block node_notifier;
struct notifier_block adist_notifier;
};
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 12daa6ec7d09..1d814dfbb8a8 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -99,6 +99,14 @@ int set_memory_block_size_order(unsigned int order);
#define MEM_PREPARE_ONLINE (1<<6)
#define MEM_FINISH_OFFLINE (1<<7)
+/* These states are used for numa node notifiers */
+#define NODE_BECOMING_MEM_AWARE (1<<0)
+#define NODE_BECAME_MEM_AWARE (1<<1)
+#define NODE_BECOMING_MEMORYLESS (1<<2)
+#define NODE_BECAME_MEMORYLESS (1<<3)
+#define NODE_CANCEL_MEM_AWARE (1<<4)
+#define NODE_CANCEL_MEMORYLESS (1<<5)

Assuming we can remove the _normal stuff and we can do what we do in patch #2 here already ... meaning we unconditionally store the nid in the MEM notifier ...

What about extending the existing memory notifier instead?

That is, we add

MEM_NODE_BECOMING_MEM_AWARE ... and trigger it using the same notifier chain. We only have to make sure these new events will be properly filtered out (IIRC, for most we do that already).

Of course, the range will not apply to these events, but the nid would apply to all.

Just a thought.

--
Cheers,

David / dhildenb