[RFC PATCH 7/8] mm: support reserved THP VMAs in anonymous faults

From: Qi Zheng

Date: Sat Jun 27 2026 - 03:28:30 EST


From: Qi Zheng <zhengqi.arch@xxxxxxxxxxxxx>

Wire VM_RESERVED_THP into the anonymous PMD fault path.

For reserved THP VMAs, the faulting folio is requested with the
__GFP_RESERVED_THP flag, restricting the allocation to reserved THP
pageblocks. The resulting folio remains a normal anonymous THP, using
the existing reclaim, swap, and buddy paths.

Additionally, enforce that reserved THP faults must either successfully
install a PMD-sized folio or fail completely. Fallbacks to the huge zero
page or small anonymous pages are not allowed if the PMD-sized allocation
fails.

Signed-off-by: Qi Zheng <zhengqi.arch@xxxxxxxxxxxxx>
---
include/linux/huge_mm.h | 4 +++-
mm/huge_memory.c | 18 +++++++++++++-----
mm/memory.c | 3 +++
3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index ad20f7f8c1794..4fe9651cd86b5 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -330,6 +330,8 @@ unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,

if (vm_flags & VM_HUGEPAGE)
mask |= READ_ONCE(huge_anon_orders_madvise);
+ if (vm_flags & VM_RESERVED_THP)
+ mask |= BIT(PMD_ORDER);
if (hugepage_global_always() ||
((vm_flags & VM_HUGEPAGE) && hugepage_global_enabled()))
mask |= READ_ONCE(huge_anon_orders_inherit);
@@ -371,7 +373,7 @@ static inline bool vma_thp_disabled(struct vm_area_struct *vma,
* Are THPs disabled only for VMAs where we didn't get an explicit
* advise to use them?
*/
- if (vm_flags & VM_HUGEPAGE)
+ if (vm_flags & (VM_HUGEPAGE | VM_RESERVED_THP))
return false;
/*
* Forcing a collapse (e.g., madv_collapse), is a clear advice to
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2bccb0a53a0a6..66d85a2fa855f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1267,6 +1267,9 @@ static struct folio *vma_alloc_anon_folio_pmd(struct vm_area_struct *vma,
const int order = HPAGE_PMD_ORDER;
struct folio *folio;

+ if (vma->vm_flags & VM_RESERVED_THP)
+ gfp |= __GFP_RESERVED_THP;
+
folio = vma_alloc_folio(gfp, order, vma, addr & HPAGE_PMD_MASK);

if (unlikely(!folio)) {
@@ -1344,8 +1347,11 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf)
vm_fault_t ret = 0;

folio = vma_alloc_anon_folio_pmd(vma, vmf->address);
- if (unlikely(!folio))
+ if (unlikely(!folio)) {
+ if (vma->vm_flags & VM_RESERVED_THP)
+ return VM_FAULT_OOM;
return VM_FAULT_FALLBACK;
+ }

pgtable = pte_alloc_one(vma->vm_mm);
if (unlikely(!pgtable)) {
@@ -1480,15 +1486,17 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
vm_fault_t ret;

if (!thp_vma_suitable_order(vma, haddr, PMD_ORDER))
- return VM_FAULT_FALLBACK;
+ return (vma->vm_flags & VM_RESERVED_THP) ? VM_FAULT_OOM :
+ VM_FAULT_FALLBACK;
ret = vmf_anon_prepare(vmf);
if (ret)
return ret;
khugepaged_enter_vma(vma, vma->vm_flags);

- if (!(vmf->flags & FAULT_FLAG_WRITE) &&
- !mm_forbids_zeropage(vma->vm_mm) &&
- transparent_hugepage_use_zero_page()) {
+ if (!(vma->vm_flags & VM_RESERVED_THP) &&
+ !(vmf->flags & FAULT_FLAG_WRITE) &&
+ !mm_forbids_zeropage(vma->vm_mm) &&
+ transparent_hugepage_use_zero_page()) {
pgtable_t pgtable;
struct folio *zero_folio;
vm_fault_t ret;
diff --git a/mm/memory.c b/mm/memory.c
index ff338c2abe923..225fc1ae22386 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5297,6 +5297,9 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
if (vma->vm_flags & VM_SHARED)
return VM_FAULT_SIGBUS;

+ if (unlikely(vma->vm_flags & VM_RESERVED_THP))
+ return VM_FAULT_OOM;
+
/*
* Use pte_alloc() instead of pte_alloc_map(), so that OOM can
* be distinguished from a transient failure of pte_offset_map().
--
2.54.0