[PATCH 11/15] mm: handle ANON_VMA_LAZY in huge page operations
From: tao
Date: Wed May 27 2026 - 07:14:05 EST
When splitting a huge page, the folio needs to be converted into multiple
subpages. Holding only folio_lock(folio) cannot guarantee that the split
operation completes atomically.
Check and upgrade anon_vma during huge page allocation and collapse to
ensure the anon_vma is properly protected.
Signed-off-by: tao <tao.wangtao@xxxxxxxxx>
---
mm/internal.h | 5 +++++
mm/khugepaged.c | 5 +++++
mm/memory.c | 17 +++++++++++++----
mm/rmap.c | 15 +++++++++++----
4 files changed, 34 insertions(+), 8 deletions(-)
diff --git a/mm/internal.h b/mm/internal.h
index 0a36eba3f63c..a746f5272aa6 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -419,6 +419,11 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma);
int __anon_vma_prepare(struct vm_area_struct *vma);
/* Called on first anon fault or from anon_vma_prepare(). */
void vma_prepare_anon_vma_lazy(struct vm_area_struct *vma);
+/*
+ * Upgrade VMA ANON_VMA_LAZY to a regular anon_vma during fork, or when
+ * cloning ANON_VMA_TREE_PARENT or a hugepage VMA.
+ */
+int vma_upgrade_anon_vma_lazy(struct vm_area_struct *vma);
void unlink_anon_vmas(struct vm_area_struct *vma);
static inline int anon_vma_prepare(struct vm_area_struct *vma)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 747748eace91..a33cda026be7 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1164,6 +1164,11 @@ static enum scan_result collapse_huge_page(struct mm_struct *mm, unsigned long a
if (result != SCAN_SUCCEED)
goto out_up_write;
+ /* Upgrade anon_vma_lazy to protect the anon_vma. */
+ if (vma_upgrade_anon_vma_lazy(vma)) {
+ result = SCAN_FAIL;
+ goto out_up_write;
+ }
anon_vma_tree_lock_write(vma->anon_vma);
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, address,
diff --git a/mm/memory.c b/mm/memory.c
index 8fd3877f69fb..26d116b3393c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3819,19 +3819,28 @@ vm_fault_t __vmf_anon_prepare(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
vm_fault_t ret = 0;
+ bool maybe_huge = pmd_none(*vmf->pmd);
- if (likely(vma->anon_vma))
- return 0;
- if (anon_vma_lazy_enabled()) {
+ if (likely(vma->anon_vma)) {
+ if (!vma_is_anon_vma_lazy(vma) || !maybe_huge)
+ return 0;
+ }
+#ifdef CONFIG_ANON_VMA_LAZY
+ if (anon_vma_lazy_enabled() && !maybe_huge) {
vma_prepare_anon_vma_lazy(vma);
return 0;
}
+#endif
if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
if (!mmap_read_trylock(vma->vm_mm))
return VM_FAULT_RETRY;
}
- if (__anon_vma_prepare(vma))
+ if (!vma->anon_vma && __anon_vma_prepare(vma))
+ ret = VM_FAULT_OOM;
+#ifdef CONFIG_ANON_VMA_LAZY
+ if (vma->anon_vma && maybe_huge && vma_upgrade_anon_vma_lazy(vma))
ret = VM_FAULT_OOM;
+#endif
if (vmf->flags & FAULT_FLAG_VMA_LOCK)
mmap_read_unlock(vma->vm_mm);
return ret;
diff --git a/mm/rmap.c b/mm/rmap.c
index d9424f4eb6d0..57cd85efc50a 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -452,13 +452,20 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
return vma_prepare_anon_vma(vma, false, NULL);
}
-static int vma_upgrade_anon_vma_lazy(struct vm_area_struct *vma)
+/**
+ * vma_upgrade_anon_vma_lazy - upgrade a VMA's lazy anon_vma to a regular one
+ * @vma: the VMA whose anon_vma_lazy is being upgraded
+ */
+int vma_upgrade_anon_vma_lazy(struct vm_area_struct *vma)
{
- anon_vma_tree_t vma_tree = vma->anon_vma;
+ anon_vma_tree_t anon_tree = READ_ONCE(vma->anon_vma);
struct anon_vma *parent_anon_vma = NULL;
- if (anon_vma_tree_is_parent(vma_tree))
- parent_anon_vma = anon_vma_tree_anon_vma(vma_tree);
+ VM_BUG_ON_VMA(!anon_tree, vma);
+ if (!anon_vma_tree_type(anon_tree))
+ return 0;
+ if (anon_vma_tree_is_parent(anon_tree))
+ parent_anon_vma = anon_vma_tree_anon_vma(anon_tree);
return vma_prepare_anon_vma(vma, true, parent_anon_vma);
}
--
2.17.1