[PATCH RFC] mm: Avoid triggering oom-killer during memory hot-remove operations

From: Li Zhijian
Date: Fri Jul 26 2024 - 04:54:20 EST


When a process is bound to a node that is being hot-removed, any memory
allocation attempts from that node should fail gracefully without
triggering the OOM-killer. However, the current behavior can cause the
oom-killer to be invoked, leading to the termination of processes on other
nodes, even when there is sufficient memory available in the system.

Prevent the oom-killer from being triggered by processes bound to a
node undergoing hot-remove operations. Instead, the allocation attempts
from the offlining node will simply fail, allowing the process to handle
the failure appropriately without causing disruption to the system.

Signed-off-by: Li Zhijian <lizhijian@xxxxxxxxxxx>
---
include/linux/memory_hotplug.h | 6 ++++++
mm/memory_hotplug.c | 21 +++++++++++++++++++++
mm/page_alloc.c | 6 ++++++
3 files changed, 33 insertions(+)

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 7a9ff464608d..0ca804215e11 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -332,6 +332,7 @@ extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages,
extern int remove_memory(u64 start, u64 size);
extern void __remove_memory(u64 start, u64 size);
extern int offline_and_remove_memory(u64 start, u64 size);
+bool is_offlining_node(nodemask_t nodes);

#else
static inline void try_offline_node(int nid) {}
@@ -348,6 +349,11 @@ static inline int remove_memory(u64 start, u64 size)
}

static inline void __remove_memory(u64 start, u64 size) {}
+
+static inline bool is_offlining_node(nodemask_t nodes)
+{
+ return false;
+}
#endif /* CONFIG_MEMORY_HOTREMOVE */

#ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 431b1f6753c0..da3982751ba9 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1938,6 +1938,22 @@ static int count_system_ram_pages_cb(unsigned long start_pfn,
return 0;
}

+static nodemask_t offlining_node = NODE_MASK_NONE;
+
+bool is_offlining_node(nodemask_t nodes)
+{
+ return nodes_equal(offlining_node, nodes);
+}
+
+static void offline_pages_start(int node)
+{
+ node_set(node, offlining_node);
+}
+
+static void offline_pages_end(void)
+{
+ offlining_node = NODE_MASK_NONE;
+}
/*
* Must be called with mem_hotplug_lock in write mode.
*/
@@ -1991,6 +2007,7 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
goto failed_removal;
}

+ offline_pages_start(node);
/*
* Disable pcplists so that page isolation cannot race with freeing
* in a way that pages from isolated pageblock are left on pcplists.
@@ -2107,6 +2124,8 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,

memory_notify(MEM_OFFLINE, &arg);
remove_pfn_range_from_zone(zone, start_pfn, nr_pages);
+ offline_pages_end();
+
return 0;

failed_removal_isolated:
@@ -2121,6 +2140,8 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
(unsigned long long) start_pfn << PAGE_SHIFT,
((unsigned long long) end_pfn << PAGE_SHIFT) - 1,
reason);
+
+ offline_pages_end();
return ret;
}

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1780df31d5f5..acdab6b114a5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3563,6 +3563,12 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
if (page)
goto out;

+ /* hot-remove is on-going, it generally fails to allocate memory from
+ * the being removed memory node. Leave it alone.
+ */
+ if (is_offlining_node(*ac->nodemask))
+ goto out;
+
/* Coredumps can quickly deplete all memory reserves */
if (current->flags & PF_DUMPCORE)
goto out;
--
2.29.2