[RFC PATCH 2/8] mm: add boot-time reserved THP pageblock capacity
From: Qi Zheng
Date: Sat Jun 27 2026 - 03:25:35 EST
From: Qi Zheng <zhengqi.arch@xxxxxxxxxxxxx>
Add kernel boot parameters "thp_reserved_size" and "thp_reserved_nr" to
allow reserving a specified number of THP pageblocks during system boot.
These reserved pageblocks are marked as MIGRATE_RESERVED_THP.
Additionally, expose the "total_hpages", "free_hpages", and "used_hpages"
nodes in sysfs (/sys/kernel/mm/reserved_thp/) to allow userspace to
monitor the usage of the reserved capacity.
Signed-off-by: Qi Zheng <zhengqi.arch@xxxxxxxxxxxxx>
---
mm/Makefile | 2 +-
mm/internal.h | 2 +
mm/page_alloc.c | 29 ++++++++++++++
mm/reserved_thp.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 130 insertions(+), 1 deletion(-)
create mode 100644 mm/reserved_thp.c
diff --git a/mm/Makefile b/mm/Makefile
index eff9f9e7e061c..fd74a7392e346 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -98,7 +98,7 @@ obj-$(CONFIG_MEMTEST) += memtest.o
obj-$(CONFIG_MIGRATION) += migrate.o
obj-$(CONFIG_NUMA) += memory-tiers.o
obj-$(CONFIG_DEVICE_MIGRATION) += migrate_device.o
-obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o
+obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o reserved_thp.o
obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
obj-$(CONFIG_LIVEUPDATE_MEMFD) += memfd_luo.o
obj-$(CONFIG_MEMCG_V1) += memcontrol-v1.o
diff --git a/mm/internal.h b/mm/internal.h
index 181e79f1d6a20..a76a1fad2a7fd 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1951,4 +1951,6 @@ static inline int get_sysctl_max_map_count(void)
bool may_expand_vm(struct mm_struct *mm, const vma_flags_t *vma_flags,
unsigned long npages);
+unsigned long reserved_thp_pageblocks(unsigned long nr_hpages);
+
#endif /* __MM_INTERNAL_H */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 613a711305072..23dbbef444f18 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3568,6 +3568,35 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
return false;
}
+unsigned long reserved_thp_pageblocks(unsigned long nr_hpages)
+{
+ unsigned int order = max_t(unsigned int, HPAGE_PMD_ORDER,
+ pageblock_order);
+ unsigned long hpages_per_block = 1UL << (order - HPAGE_PMD_ORDER);
+ unsigned long reserved = 0;
+ gfp_t gfp = (GFP_HIGHUSER | __GFP_COMP | __GFP_NOMEMALLOC |
+ __GFP_NOWARN | __GFP_NORETRY);
+
+ while (reserved < nr_hpages) {
+ struct page *page;
+ struct zone *zone;
+ unsigned long flags;
+
+ page = alloc_pages(gfp, order);
+ if (!page)
+ break;
+
+ zone = page_zone(page);
+ spin_lock_irqsave(&zone->lock, flags);
+ change_pageblock_range(page, order, MIGRATE_RESERVED_THP);
+ zone->nr_reserved_thp += 1UL << order;
+ spin_unlock_irqrestore(&zone->lock, flags);
+ __free_pages(page, order);
+ reserved += hpages_per_block;
+ }
+ return reserved;
+}
+
static inline long __zone_watermark_unusable_free(struct zone *z,
unsigned int order, unsigned int alloc_flags)
{
diff --git a/mm/reserved_thp.c b/mm/reserved_thp.c
new file mode 100644
index 0000000000000..1eee4f39b9d69
--- /dev/null
+++ b/mm/reserved_thp.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/mm.h>
+#include "internal.h"
+
+static DEFINE_SPINLOCK(reserved_thp_lock);
+
+static unsigned long reserved_thp_cmdline_size __initdata = HPAGE_PMD_SIZE;
+static bool reserved_thp_cmdline_size_valid __initdata = true;
+static unsigned long reserved_thp_requested __initdata;
+static unsigned long reserved_thp_total;
+static unsigned long reserved_thp_used;
+
+static int __init setup_reserved_thp_size(char *str)
+{
+ unsigned long size;
+ size = memparse(str, NULL);
+ if (size != HPAGE_PMD_SIZE) {
+ pr_warn("unsupported thp_reserved_size=%s, only %lu is supported\n",
+ str, HPAGE_PMD_SIZE);
+ reserved_thp_cmdline_size_valid = false;
+ return -EINVAL;
+ }
+ reserved_thp_cmdline_size = size;
+ reserved_thp_cmdline_size_valid = true;
+ return 0;
+}
+early_param("thp_reserved_size", setup_reserved_thp_size);
+static int __init setup_reserved_thp_nr(char *str)
+{
+ int count;
+ if (sscanf(str, "%lu%n", &reserved_thp_requested, &count) != 1 ||
+ str[count]) {
+ pr_warn("invalid thp_reserved_nr=%s\n", str);
+ reserved_thp_requested = 0;
+ return -EINVAL;
+ }
+ return 0;
+}
+early_param("thp_reserved_nr", setup_reserved_thp_nr);
+
+static ssize_t total_hpages_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lu\n", READ_ONCE(reserved_thp_total));
+}
+static ssize_t free_hpages_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ unsigned long free_hpages;
+
+ spin_lock(&reserved_thp_lock);
+ free_hpages = reserved_thp_total - reserved_thp_used;
+ spin_unlock(&reserved_thp_lock);
+
+ return sysfs_emit(buf, "%lu\n", free_hpages);
+}
+static ssize_t used_hpages_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lu\n", READ_ONCE(reserved_thp_used));
+}
+
+static struct kobj_attribute total_hpages_attr = __ATTR_RO(total_hpages);
+static struct kobj_attribute free_hpages_attr = __ATTR_RO(free_hpages);
+static struct kobj_attribute used_hpages_attr = __ATTR_RO(used_hpages);
+
+static struct attribute *reserved_thp_attrs[] = {
+ &total_hpages_attr.attr,
+ &free_hpages_attr.attr,
+ &used_hpages_attr.attr,
+ NULL,
+};
+
+static const struct attribute_group reserved_thp_attr_group = {
+ .attrs = reserved_thp_attrs,
+};
+
+static int __init reserved_thp_init(void)
+{
+ struct kobject *kobj;
+ int ret;
+
+ if (reserved_thp_requested && reserved_thp_cmdline_size_valid) {
+ reserved_thp_total = reserved_thp_pageblocks(reserved_thp_requested);
+ pr_info("reserved %lu/%lu PMD THP pageblocks (%lu bytes each)\n",
+ reserved_thp_total, reserved_thp_requested,
+ reserved_thp_cmdline_size);
+ }
+ kobj = kobject_create_and_add("reserved_thp", mm_kobj);
+ if (!kobj)
+ return -ENOMEM;
+ ret = sysfs_create_group(kobj, &reserved_thp_attr_group);
+ if (ret)
+ kobject_put(kobj);
+ return ret;
+}
+subsys_initcall(reserved_thp_init);
\ No newline at end of file
--
2.54.0