[PATCH v4 5/9] mm/memory_hotplug: add multi-range hotunplug
From: Gregory Price
Date: Fri Jun 05 2026 - 17:23:02 EST
offline_and_remove_memory() handles a single contiguous range.
Callers that manage a device composed of several ranges (e.g. dax/kmem)
currently have to call it in a loop, which gives up atomicity.
This creates a race condition where another daemon can online a block
that was just offlined while other blocks are being offlined, causing
the eventual (original) unplug operation to fail.
Add offline_and_remove_memory_ranges(), which takes an array of ranges
and processes them as one operation under a single lock_device_hotplug():
- Phase 1 offlines every block of every range, remembering each block's
previous online type.
- Phase 2 removes the ranges only once all of them are offline.
- If any offline fails, the offlining done so far is reverted and
nothing is removed.
This gives callers all-or-nothing semantics for the offline step, so a
failed or interrupted unplug leaves every range online as before rather
than in an inconsistent partially-removed state.
Suggested-by: David Hildenbrand (Arm) <david@xxxxxxxxxx>
Signed-off-by: Gregory Price <gourry@xxxxxxxxxx>
---
include/linux/memory_hotplug.h | 7 +++
mm/memory_hotplug.c | 95 ++++++++++++++++++++++++++++++++++
2 files changed, 102 insertions(+)
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index d3edeb80aadb..7f1da7c428dc 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -267,6 +267,7 @@ extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages,
extern int remove_memory(u64 start, u64 size);
extern void __remove_memory(u64 start, u64 size);
extern int offline_and_remove_memory(u64 start, u64 size);
+int offline_and_remove_memory_ranges(const struct range *ranges, int nr_ranges);
#else
static inline void try_offline_node(int nid) {}
@@ -283,6 +284,12 @@ static inline int remove_memory(u64 start, u64 size)
}
static inline void __remove_memory(u64 start, u64 size) {}
+
+static inline int offline_and_remove_memory_ranges(const struct range *ranges,
+ int nr_ranges)
+{
+ return -EBUSY;
+}
#endif /* CONFIG_MEMORY_HOTREMOVE */
#ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 7d145217adc6..e486d35c22b2 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -2483,4 +2483,99 @@ int offline_and_remove_memory(u64 start, u64 size)
return rc;
}
EXPORT_SYMBOL_GPL(offline_and_remove_memory);
+
+/**
+ * offline_and_remove_memory_ranges - offline and remove multiple memory ranges
+ * @ranges: array of physical address ranges to offline and remove
+ * @nr_ranges: number of entries in @ranges
+ *
+ * Offline and remove several memory ranges as one operation, serialized
+ * against other hotplug operations by a single lock_device_hotplug().
+ *
+ * Unlike calling offline_and_remove_memory() in a loop, this offlines *all*
+ * ranges before removing any of them. If offlining any range fails, the
+ * offlining of the ranges processed so far is reverted and nothing is
+ * removed, leaving every range online as it was before the call. This gives
+ * callers all-or-nothing semantics for the offline step, so a failed unplug
+ * does not leave a device split between online and removed ranges.
+ *
+ * Each range must be memory-block aligned in start and size.
+ *
+ * Return: 0 on success, negative errno otherwise. On failure no range has
+ * been removed.
+ */
+int offline_and_remove_memory_ranges(const struct range *ranges, int nr_ranges)
+{
+ unsigned long mb_total = 0;
+ uint8_t *online_types, *tmp;
+ int i, rc = 0;
+
+ if (!ranges || nr_ranges <= 0)
+ return -EINVAL;
+
+ for (i = 0; i < nr_ranges; i++) {
+ u64 start = ranges[i].start;
+ u64 size = range_len(&ranges[i]);
+
+ if (!IS_ALIGNED(start, memory_block_size_bytes()) ||
+ !IS_ALIGNED(size, memory_block_size_bytes()) || !size)
+ return -EINVAL;
+ mb_total += size / memory_block_size_bytes();
+ }
+
+ /*
+ * Remember the old online type of every memory block across all ranges,
+ * so we can revert if offlining a later block fails. All entries start
+ * as MMOP_OFFLINE so blocks we never touched are skipped on rollback.
+ */
+ online_types = kmalloc_array(mb_total, sizeof(*online_types),
+ GFP_KERNEL);
+ if (!online_types)
+ return -ENOMEM;
+ memset(online_types, MMOP_OFFLINE, mb_total);
+
+ lock_device_hotplug();
+
+ /* Phase 1: offline every block in every range. */
+ tmp = online_types;
+ for (i = 0; i < nr_ranges; i++) {
+ rc = walk_memory_blocks(ranges[i].start, range_len(&ranges[i]),
+ &tmp, try_offline_memory_block);
+ if (rc)
+ break;
+ }
+
+ /*
+ * Phase 2: only once everything is offline, remove it. This cannot
+ * fail as the memory can no longer be onlined in the meantime.
+ */
+ if (!rc) {
+ for (i = 0; i < nr_ranges; i++) {
+ rc = try_remove_memory(ranges[i].start,
+ range_len(&ranges[i]));
+ if (rc) {
+ pr_err("%s: Failed to remove memory: %d",
+ __func__, rc);
+ break;
+ }
+ }
+ }
+
+ /*
+ * Roll back the offlining if anything failed. Blocks we never offlined
+ * are marked MMOP_OFFLINE and skipped by try_reonline_memory_block().
+ */
+ if (rc) {
+ tmp = online_types;
+ for (i = 0; i < nr_ranges; i++)
+ walk_memory_blocks(ranges[i].start,
+ range_len(&ranges[i]), &tmp,
+ try_reonline_memory_block);
+ }
+ unlock_device_hotplug();
+
+ kfree(online_types);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(offline_and_remove_memory_ranges);
#endif /* CONFIG_MEMORY_HOTREMOVE */
--
2.54.0