[RFC PATCH v4 21/27] mm/memory-failure: add memory_failure callback to node_private_ops

From: Gregory Price

Date: Sun Feb 22 2026 - 03:57:08 EST

Add a void memory_failure notification callback to struct
node_private_ops so services managing N_MEMORY_PRIVATE nodes notified
when a page on their node experiences a hardware error.

The callback is notification only -- the kernel always proceeds with
standard hwpoison handling for online pages.

The notification hook fires after TestSetPageHWPoison succeeds and
before get_hwpoison_page giving the service a chance to clean up.

Signed-off-by: Gregory Price <gourry@xxxxxxxxxx>
---
include/linux/node_private.h | 6 ++++++
mm/internal.h | 16 ++++++++++++++++
mm/memory-failure.c | 15 +++++++++++++++
3 files changed, 37 insertions(+)

diff --git a/include/linux/node_private.h b/include/linux/node_private.h
index 7a7438fb9eda..d2669f68ac20 100644
--- a/include/linux/node_private.h
+++ b/include/linux/node_private.h
@@ -113,6 +113,10 @@ struct node_reclaim_policy {
* watermark_boost lifecycle (kswapd will not clear it).
* If NULL, normal boost policy applies.
*
+ * @memory_failure: Notification of hardware error on a page on this node.
+ * [folio-referenced callback]
+ * Notification only, kernel always handles the failure.
+ *
* @flags: Operation exclusion flags (NP_OPS_* constants).
*
*/
@@ -127,6 +131,8 @@ struct node_private_ops {
vm_fault_t (*handle_fault)(struct folio *folio, struct vm_fault *vmf,
enum pgtable_level level);
void (*reclaim_policy)(int nid, struct node_reclaim_policy *policy);
+ void (*memory_failure)(struct folio *folio, unsigned long pfn,
+ int mf_flags);
unsigned long flags;
};

diff --git a/mm/internal.h b/mm/internal.h
index db32cb2d7a29..64467ca774f1 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1608,6 +1608,22 @@ static inline void node_private_reclaim_policy(int nid,
}
#endif

+static inline void folio_managed_memory_failure(struct folio *folio,
+ unsigned long pfn,
+ int mf_flags)
+{
+ /* Zone device pages handle memory failure via dev_pagemap_ops */
+ if (folio_is_zone_device(folio))
+ return;
+ if (folio_is_private_node(folio)) {
+ const struct node_private_ops *ops =
+ folio_node_private_ops(folio);
+
+ if (ops && ops->memory_failure)
+ ops->memory_failure(folio, pfn, mf_flags);
+ }
+}
+
struct vm_struct *__get_vm_area_node(unsigned long size,
unsigned long align, unsigned long shift,
unsigned long vm_flags, unsigned long start,
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index c80c2907da33..79c91d44ec1e 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -2379,6 +2379,15 @@ int memory_failure(unsigned long pfn, int flags)
goto unlock_mutex;
}

+ /*
+ * Notify private-node services about the hardware error so they
+ * can update internal tracking (e.g., CXL poison lists, stop
+ * demoting to failing DIMMs). This is notification only -- the
+ * kernel proceeds with standard hwpoison handling regardless.
+ */
+ if (unlikely(page_is_private_managed(p)))
+ folio_managed_memory_failure(page_folio(p), pfn, flags);
+
/*
* We need/can do nothing about count=0 pages.
* 1) it's a free page, and therefore in safe hand:
@@ -2825,6 +2834,12 @@ static int soft_offline_in_use_page(struct page *page)
return 0;
}

+ if (!folio_managed_allows_migrate(folio)) {
+ pr_info("%#lx: cannot migrate private node folio\n", pfn);
+ folio_put(folio);
+ return -EBUSY;
+ }
+
isolated = isolate_folio_to_list(folio, &pagelist);

/*
--
2.53.0