Re: [PATCH v2 1/8] mm/rmap: improve anon_vma_clone(), unlink_anon_vmas() comments, add asserts
From: Andrew Morton
Date: Wed Jan 14 2026 - 16:59:13 EST
On Wed, 14 Jan 2026 19:02:20 +0000 Lorenzo Stoakes <lorenzo.stoakes@xxxxxxxxxx> wrote:
> Can you apply the below fix-patch to this to fix up a rather silly
> failure-to-unlock mistake that Suren picked up on?
>
> Luckily this partial unmap function is unlikely to ever be triggerable in real
> life, AND more to the point - a later patch completely eliminates the locking -
> but to avoid bisection hazard let's fix this.
>
> Note that there is a conflict at 'mm/rmap: allocate anon_vma_chain objects
> unlocked when possible', please resolve it by just taking that patch and
> dropping _everything_ from this one _including_ the trailing 'if (root) ...'
> code.
No probs.
mm-rmap-allocate-anon_vma_chain-objects-unlocked-when-possible.patch is
now
--- a/mm/rmap.c~mm-rmap-allocate-anon_vma_chain-objects-unlocked-when-possible
+++ a/mm/rmap.c
@@ -147,14 +147,13 @@ static void anon_vma_chain_free(struct a
kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
}
-static void anon_vma_chain_link(struct vm_area_struct *vma,
- struct anon_vma_chain *avc,
- struct anon_vma *anon_vma)
+static void anon_vma_chain_assign(struct vm_area_struct *vma,
+ struct anon_vma_chain *avc,
+ struct anon_vma *anon_vma)
{
avc->vma = vma;
avc->anon_vma = anon_vma;
list_add(&avc->same_vma, &vma->anon_vma_chain);
- anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
}
/**
@@ -211,7 +210,8 @@ int __anon_vma_prepare(struct vm_area_st
spin_lock(&mm->page_table_lock);
if (likely(!vma->anon_vma)) {
vma->anon_vma = anon_vma;
- anon_vma_chain_link(vma, avc, anon_vma);
+ anon_vma_chain_assign(vma, avc, anon_vma);
+ anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
anon_vma->num_active_vmas++;
allocated = NULL;
avc = NULL;
@@ -292,21 +292,31 @@ int anon_vma_clone(struct vm_area_struct
check_anon_vma_clone(dst, src);
- /* All anon_vma's share the same root. */
+ /*
+ * Allocate AVCs. We don't need an anon_vma lock for this as we
+ * are not updating the anon_vma rbtree nor are we changing
+ * anon_vma statistics.
+ *
+ * We hold the exclusive mmap write lock so there's no possibliity of
+ * the unlinked AVC's being observed yet.
+ */
+ list_for_each_entry(pavc, &src->anon_vma_chain, same_vma) {
+ avc = anon_vma_chain_alloc(GFP_KERNEL);
+ if (!avc)
+ goto enomem_failure;
+
+ anon_vma_chain_assign(dst, avc, pavc->anon_vma);
+ }
+
+ /*
+ * Now link the anon_vma's back to the newly inserted AVCs.
+ * Note that all anon_vma's share the same root.
+ */
anon_vma_lock_write(src->anon_vma);
- list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
- struct anon_vma *anon_vma;
+ list_for_each_entry_reverse(avc, &dst->anon_vma_chain, same_vma) {
+ struct anon_vma *anon_vma = avc->anon_vma;
- avc = anon_vma_chain_alloc(GFP_NOWAIT);
- if (unlikely(!avc)) {
- anon_vma_unlock_write(src->anon_vma);
- avc = anon_vma_chain_alloc(GFP_KERNEL);
- if (!avc)
- goto enomem_failure;
- anon_vma_lock_write(src->anon_vma);
- }
- anon_vma = pavc->anon_vma;
- anon_vma_chain_link(dst, avc, anon_vma);
+ anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
/*
* Reuse existing anon_vma if it has no vma and only one
@@ -322,7 +332,6 @@ int anon_vma_clone(struct vm_area_struct
}
if (dst->anon_vma)
dst->anon_vma->num_active_vmas++;
-
anon_vma_unlock_write(src->anon_vma);
return 0;
@@ -384,8 +393,10 @@ int anon_vma_fork(struct vm_area_struct
get_anon_vma(anon_vma->root);
/* Mark this anon_vma as the one where our new (COWed) pages go. */
vma->anon_vma = anon_vma;
+ anon_vma_chain_assign(vma, avc, anon_vma);
+ /* Now let rmap see it. */
anon_vma_lock_write(anon_vma);
- anon_vma_chain_link(vma, avc, anon_vma);
+ anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
anon_vma->parent->num_children++;
anon_vma_unlock_write(anon_vma);
@@ -402,40 +413,18 @@ int anon_vma_fork(struct vm_area_struct
* In the unfortunate case of anon_vma_clone() failing to allocate memory we
* have to clean things up.
*
- * On clone we hold the exclusive mmap write lock, so we can't race
- * unlink_anon_vmas(). Since we're cloning, we know we can't have empty
- * anon_vma's, since existing anon_vma's are what we're cloning from.
- *
- * So this function needs only traverse the anon_vma_chain and free each
- * allocated anon_vma_chain.
+ * Since we allocate anon_vma_chain's before we insert them into the interval
+ * trees, we simply have to free up the AVC's and remove the entries from the
+ * VMA's anon_vma_chain.
*/
static void cleanup_partial_anon_vmas(struct vm_area_struct *vma)
{
struct anon_vma_chain *avc, *next;
- struct anon_vma *root = NULL;
-
- /*
- * We exclude everybody else from being able to modify anon_vma's
- * underneath us.
- */
- mmap_assert_locked(vma->vm_mm);
list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
- struct anon_vma *anon_vma = avc->anon_vma;
-
- /* All anon_vma's share the same root. */
- if (!root) {
- root = anon_vma->root;
- anon_vma_lock_write(root);
- }
-
- anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);
list_del(&avc->same_vma);
anon_vma_chain_free(avc);
}
-
- if (root)
- anon_vma_unlock_write(root);
}
/**
_