[RFC PATCH v4 18/27] mm/memory: NP_OPS_NUMA_BALANCING - private node NUMA balancing
From: Gregory Price
Date: Sun Feb 22 2026 - 03:55:54 EST
Not all private nodes may wish to engage in NUMA balancing faults.
Add the NP_OPS_NUMA_BALANCING flag (BIT(5)) as an opt-in method.
Introduce folio_managed_allows_numa() helper:
ZONE_DEVICE folios always return false (never NUMA-scanned)
NP_OPS_NUMA_BALANCING filters for private nodes
In do_numa_page(), if a private-node folio with NP_OPS_PROTECT_WRITE
is still on its node after a failed/skipped migration, enforce
write-protection so the next write triggers handle_fault.
Signed-off-by: Gregory Price <gourry@xxxxxxxxxx>
---
drivers/base/node.c | 4 ++++
include/linux/node_private.h | 16 ++++++++++++++++
mm/memory.c | 11 +++++++++++
mm/mempolicy.c | 5 ++++-
4 files changed, 35 insertions(+), 1 deletion(-)
diff --git a/drivers/base/node.c b/drivers/base/node.c
index a4955b9b5b93..88aaac45e814 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -961,6 +961,10 @@ int node_private_set_ops(int nid, const struct node_private_ops *ops)
(ops->flags & NP_OPS_PROTECT_WRITE))
return -EINVAL;
+ if ((ops->flags & NP_OPS_NUMA_BALANCING) &&
+ !(ops->flags & NP_OPS_MIGRATION))
+ return -EINVAL;
+
mutex_lock(&node_private_lock);
np = rcu_dereference_protected(NODE_DATA(nid)->node_private,
lockdep_is_held(&node_private_lock));
diff --git a/include/linux/node_private.h b/include/linux/node_private.h
index 34d862f09e24..5ac60db1f044 100644
--- a/include/linux/node_private.h
+++ b/include/linux/node_private.h
@@ -140,6 +140,8 @@ struct node_private_ops {
#define NP_OPS_PROTECT_WRITE BIT(3)
/* Kernel reclaim (kswapd, direct reclaim, OOM) operates on this node */
#define NP_OPS_RECLAIM BIT(4)
+/* Allow NUMA balancing to scan and migrate folios on this node */
+#define NP_OPS_NUMA_BALANCING BIT(5)
/* Private node is OOM-eligible: reclaim can run and pages can be demoted here */
#define NP_OPS_OOM_ELIGIBLE (NP_OPS_RECLAIM | NP_OPS_DEMOTION)
@@ -263,6 +265,15 @@ static inline void folio_managed_split_cb(struct folio *original_folio,
}
#ifdef CONFIG_MEMORY_HOTPLUG
+static inline bool folio_managed_allows_numa(struct folio *folio)
+{
+ if (!folio_is_private_managed(folio))
+ return true;
+ if (folio_is_zone_device(folio))
+ return false;
+ return folio_private_flags(folio, NP_OPS_NUMA_BALANCING);
+}
+
static inline int folio_managed_allows_user_migrate(struct folio *folio)
{
if (folio_is_zone_device(folio))
@@ -443,6 +454,11 @@ int node_private_clear_ops(int nid, const struct node_private_ops *ops);
#else /* !CONFIG_NUMA || !CONFIG_MEMORY_HOTPLUG */
+static inline bool folio_managed_allows_numa(struct folio *folio)
+{
+ return !folio_is_zone_device(folio);
+}
+
static inline int folio_managed_allows_user_migrate(struct folio *folio)
{
return -ENOENT;
diff --git a/mm/memory.c b/mm/memory.c
index 0f78988befef..88a581baae40 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -78,6 +78,7 @@
#include <linux/sched/sysctl.h>
#include <linux/pgalloc.h>
#include <linux/uaccess.h>
+#include <linux/node_private.h>
#include <trace/events/kmem.h>
@@ -6041,6 +6042,12 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
if (!folio || folio_is_zone_device(folio))
goto out_map;
+ /*
+ * We do not need to check private-node folios here because the private
+ * memory service either never opted in to NUMA balancing, or it did
+ * and we need to restore private PTE controls on the failure path.
+ */
+
nid = folio_nid(folio);
nr_pages = folio_nr_pages(folio);
@@ -6078,6 +6085,10 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
/*
* Make it present again, depending on how arch implements
* non-accessible ptes, some can allow access by kernel mode.
+ *
+ * If the folio is still on a private node with NP_OPS_PROTECT_WRITE,
+ * enforce write-protection so the next write triggers handle_fault.
+ * This covers migration-failed and migration-skipped paths.
*/
if (unlikely(folio && folio_managed_wrprotect(folio))) {
writable = false;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 8ac014950e88..8a3a9916ab59 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -861,7 +861,10 @@ bool folio_can_map_prot_numa(struct folio *folio, struct vm_area_struct *vma,
{
int nid;
- if (!folio || folio_is_zone_device(folio) || folio_test_ksm(folio))
+ if (!folio || folio_test_ksm(folio))
+ return false;
+
+ if (unlikely(!folio_managed_allows_numa(folio)))
return false;
/* Also skip shared copy-on-write folios */
--
2.53.0