[RFC PATCH] mm: batch link_file_vma calls in dup_mmap

From: Yibin Liu

Date: Tue Jun 16 2026 - 05:18:14 EST


Forking a process with many file-backed mappings sharing the same
file (e.g. a dynamically linked binary with several mappings into
the same shared library) repeatedly acquires and releases the
mapping i_mmap_rwsem in dup_mmap(), once per vma, as each vma is
inserted into the address_space interval tree.

Mirror the unlink_file_vma_batch mechanism added for free_pgd_range()
by commit 3577dbb19241 ("mm: batch unlink_file_vma calls in
free_pgd_range") and apply the same idea on the vma creation side:
introduce link_vma_file_batch, which gathers consecutive vmas backed
by the same file and inserts them into the interval tree under a
single i_mmap_lock_write()/i_mmap_unlock_write() pair instead of one
pair per vma.

Unlike the unlink side, vma_interval_tree_insert_after() needs both
the new vma and the vma it is inserted after, so the batch keeps a
parallel old_vmas[] array alongside new_vmas[] rather than the single
vmas[] array used by unlink_vma_file_batch.

link_file_vma_batch_add() is wired into dup_mmap()'s vma copy loop in
place of the inline i_mmap_lock_write()/vma_interval_tree_insert_after()
sequence, and link_file_vma_batch_final() flushes any pending batch
both on the successful loop exit and on every error path that jumps
to loop_out, so the interval tree is never left out of sync with the
vmas already linked into the maple tree.

Tested with the same doexec benchmark used by 3577dbb19241:
http://apollo.backplane.com/DFlyMisc/doexec.c

$ cc -O2 -o shared-doexec doexec.c
$ ./shared-doexec $(nproc)

Run on an AMD EPYC 9754 with 512 threads, execs per second improved
by roughly 2%-7% over the unpatched kernel across repeated runs.

Signed-off-by: Yibin Liu <liuyibin@xxxxxxxx>
---
mm/mmap.c | 14 ++++----------
mm/vma.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
mm/vma.h | 14 ++++++++++++++
3 files changed, 67 insertions(+), 10 deletions(-)

diff --git a/mm/mmap.c b/mm/mmap.c
index 2311ae7c2..d5a4312df 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1735,6 +1735,7 @@ __latent_entropy int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
unsigned long charge = 0;
LIST_HEAD(uf);
VMA_ITERATOR(vmi, mm, 0);
+ struct link_vma_file_batch vb;

if (mmap_write_lock_killable(oldmm))
return -EINTR;
@@ -1758,6 +1759,7 @@ __latent_entropy int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
if (unlikely(retval))
goto out;

+ link_file_vma_batch_init(&vb);
mt_clear_in_rcu(vmi.mas.tree);
for_each_vma(vmi, mpnt) {
struct file *file;
@@ -1822,18 +1824,9 @@ __latent_entropy int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)

file = tmp->vm_file;
if (file) {
- struct address_space *mapping = file->f_mapping;
-
get_file(file);
- i_mmap_lock_write(mapping);
- if (vma_is_shared_maywrite(tmp))
- mapping_allow_writable(mapping);
- flush_dcache_mmap_lock(mapping);
/* insert tmp into the share list, just after mpnt */
- vma_interval_tree_insert_after(tmp, mpnt,
- &mapping->i_mmap);
- flush_dcache_mmap_unlock(mapping);
- i_mmap_unlock_write(mapping);
+ link_file_vma_batch_add(&vb, tmp, mpnt);
}

if (!(tmp->vm_flags & VM_WIPEONFORK))
@@ -1847,6 +1840,7 @@ __latent_entropy int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
/* a new mm has just been created */
retval = arch_dup_mmap(oldmm, mm);
loop_out:
+ link_file_vma_batch_final(&vb);
vma_iter_free(&vmi);
if (!retval) {
mt_set_in_rcu(vmi.mas.tree);
diff --git a/mm/vma.c b/mm/vma.c
index 9eea28508..59343eaad 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -1827,6 +1827,55 @@ void unlink_file_vma_batch_final(struct unlink_vma_file_batch *vb)
unlink_file_vma_batch_process(vb);
}

+void link_file_vma_batch_init(struct link_vma_file_batch *vb)
+{
+ vb->count = 0;
+}
+
+static void link_file_vma_batch_process(struct link_vma_file_batch *vb)
+{
+ struct address_space *mapping;
+ int i;
+
+ mapping = vb->new_vmas[0]->vm_file->f_mapping;
+ i_mmap_lock_write(mapping);
+ for (i = 0; i < vb->count; i++) {
+ VM_WARN_ON_ONCE(vb->new_vmas[i]->vm_file->f_mapping != mapping);
+ if (vma_is_shared_maywrite(vb->new_vmas[i]))
+ mapping_allow_writable(mapping);
+ flush_dcache_mmap_lock(mapping);
+ vma_interval_tree_insert_after(vb->new_vmas[i], vb->old_vmas[i],
+ &mapping->i_mmap);
+ flush_dcache_mmap_unlock(mapping);
+ }
+ i_mmap_unlock_write(mapping);
+
+ link_file_vma_batch_init(vb);
+}
+
+void link_file_vma_batch_add(struct link_vma_file_batch *vb,
+ struct vm_area_struct *new_vma,
+ struct vm_area_struct *old_vma)
+{
+ if (new_vma->vm_file == NULL)
+ return;
+
+ if ((vb->count > 0 && vb->new_vmas[0]->vm_file != new_vma->vm_file) ||
+ vb->count == ARRAY_SIZE(vb->new_vmas))
+ link_file_vma_batch_process(vb);
+
+ vb->new_vmas[vb->count] = new_vma;
+ vb->old_vmas[vb->count] = old_vma;
+ vb->count++;
+}
+
+void link_file_vma_batch_final(struct link_vma_file_batch *vb)
+{
+ if (vb->count > 0)
+ link_file_vma_batch_process(vb);
+}
+
+
static void vma_link_file(struct vm_area_struct *vma, bool hold_rmap_lock)
{
struct file *file = vma->vm_file;
diff --git a/mm/vma.h b/mm/vma.h
index 8e4b61a73..4446024fb 100644
--- a/mm/vma.h
+++ b/mm/vma.h
@@ -28,6 +28,12 @@ struct unlink_vma_file_batch {
struct vm_area_struct *vmas[8];
};

+struct link_vma_file_batch {
+ int count;
+ struct vm_area_struct *new_vmas[8];
+ struct vm_area_struct *old_vmas[8];
+};
+
/*
* vma munmap operation
*/
@@ -447,6 +453,14 @@ void unlink_file_vma_batch_final(struct unlink_vma_file_batch *vb);
void unlink_file_vma_batch_add(struct unlink_vma_file_batch *vb,
struct vm_area_struct *vma);

+void link_file_vma_batch_init(struct link_vma_file_batch *vb);
+
+void link_file_vma_batch_final(struct link_vma_file_batch *vb);
+
+void link_file_vma_batch_add(struct link_vma_file_batch *vb,
+ struct vm_area_struct *new_vma,
+ struct vm_area_struct *old_vma);
+
struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
unsigned long addr, unsigned long len, pgoff_t pgoff,
bool *need_rmap_locks);
--
2.34.1