[RFC PATCH v3 1/3] mm: add __do_mmap() and vm_mmap_seal_remote()
From: Cong Wang
Date: Fri Jun 12 2026 - 20:16:07 EST
Add __do_mmap(), a variant of do_mmap() that installs the mapping into
a caller-supplied mm rather than current->mm. do_mmap() becomes a thin
wrapper that passes current->mm, so all existing callers and the public
do_mmap() signature are unchanged; the same split is applied in the
nommu do_mmap(). mmap_region()/__mmap_region() gain an mm argument
(their sole caller is __do_mmap()) so the target mm flows down to where
the VMA is inserted. __do_mmap() is mm-internal, declared in
mm/internal.h.
On top of that, add vm_mmap_seal_remote() in mm/util.c, a high-level
entry point that installs a mapping into a caller-specified mm. The
intended consumer is seccomp_unotify, where an unprivileged supervisor
needs to install a sealed pinned memfd region in a supervised task's
address space without target-side cooperation (the existing mseal-based
pinned-memfd flow only worked if the target installed its own mmap+mseal
during a trusted setup window, which is unavailable for fork+execve
sandbox wrappers).
LSM hooks (security_mmap_file, fsnotify_mmap_perm) run against
current, the supervisor installing the mapping, not the target
mm's owner. This matches the supervisor-installs-into-target
mental model and parallels pidfd_getfd()'s cross-task fd install.
Cross-task authorization is left to the caller; this primitive
performs no ptrace_may_access check. The seccomp consumer gates
on listener-fd ownership.
Assisted-by: Claude:claude-opus-4.8
Signed-off-by: Cong Wang <cwang@xxxxxxxxxxxxxx>
---
include/linux/mm.h | 2 ++
mm/internal.h | 5 +++++
mm/mmap.c | 29 ++++++++++++++++++---------
mm/nommu.c | 12 ++++++++++-
mm/util.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++
mm/vma.c | 18 ++++++++---------
mm/vma.h | 6 +++---
7 files changed, 100 insertions(+), 22 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index fc2acedf0b76..dd14a32f76d3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4118,6 +4118,8 @@ extern unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long flags,
vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate,
struct list_head *uf);
+unsigned long vm_mmap_seal_remote(struct mm_struct *mm, struct file *file,
+ unsigned long addr, unsigned long len, unsigned long pgoff);
extern int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
unsigned long start, size_t len, struct list_head *uf,
bool unlock);
diff --git a/mm/internal.h b/mm/internal.h
index 5a2ddcf68e0b..897c4e08e0b1 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1437,6 +1437,11 @@ extern unsigned long __must_check vm_mmap_pgoff(struct file *, unsigned long,
unsigned long, unsigned long,
unsigned long, unsigned long);
+unsigned long __do_mmap(struct mm_struct *mm, struct file *file,
+ unsigned long addr, unsigned long len, unsigned long prot,
+ unsigned long flags, vm_flags_t vm_flags, unsigned long pgoff,
+ unsigned long *populate, struct list_head *uf);
+
extern void set_pageblock_order(void);
unsigned long reclaim_pages(struct list_head *folio_list);
unsigned int reclaim_clean_pages_from_list(struct zone *zone,
diff --git a/mm/mmap.c b/mm/mmap.c
index 5754d1c36462..c9e437effd9c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -277,7 +277,7 @@ static inline bool file_mmap_ok(struct file *file, struct inode *inode,
}
/**
- * do_mmap() - Perform a userland memory mapping into the current process
+ * __do_mmap() - Perform a userland memory mapping into @mm's
* address space of length @len with protection bits @prot, mmap flags @flags
* (from which VMA flags will be inferred), and any additional VMA flags to
* apply @vm_flags. If this is a file-backed mapping then the file is specified
@@ -307,8 +307,11 @@ static inline bool file_mmap_ok(struct file *file, struct inode *inode,
* start of a VMA, rather only the start of a valid mapped range of length
* @len bytes, rounded down to the nearest page size.
*
- * The caller must write-lock current->mm->mmap_lock.
+ * The caller must write-lock @mm->mmap_lock. do_mmap() is the common
+ * wrapper that targets current->mm.
*
+ * @mm: The mm_struct to install the mapping into. The caller must hold a
+ * reference and write-lock its mmap_lock.
* @file: An optional struct file pointer describing the file which is to be
* mapped, if a file-backed mapping.
* @addr: If non-zero, hints at (or if @flags has MAP_FIXED set, specifies) the
@@ -333,13 +336,12 @@ static inline bool file_mmap_ok(struct file *file, struct inode *inode,
* Returns: Either an error, or the address at which the requested mapping has
* been performed.
*/
-unsigned long do_mmap(struct file *file, unsigned long addr,
- unsigned long len, unsigned long prot,
- unsigned long flags, vm_flags_t vm_flags,
- unsigned long pgoff, unsigned long *populate,
- struct list_head *uf)
+unsigned long __do_mmap(struct mm_struct *mm, struct file *file,
+ unsigned long addr, unsigned long len,
+ unsigned long prot, unsigned long flags,
+ vm_flags_t vm_flags, unsigned long pgoff,
+ unsigned long *populate, struct list_head *uf)
{
- struct mm_struct *mm = current->mm;
int pkey = 0;
*populate = 0;
@@ -557,7 +559,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
vm_flags |= VM_NORESERVE;
}
- addr = mmap_region(file, addr, len, vm_flags, pgoff, uf);
+ addr = mmap_region(mm, file, addr, len, vm_flags, pgoff, uf);
if (!IS_ERR_VALUE(addr) &&
((vm_flags & VM_LOCKED) ||
(flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
@@ -565,6 +567,15 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
return addr;
}
+unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len,
+ unsigned long prot, unsigned long flags,
+ vm_flags_t vm_flags, unsigned long pgoff,
+ unsigned long *populate, struct list_head *uf)
+{
+ return __do_mmap(current->mm, file, addr, len, prot, flags,
+ vm_flags, pgoff, populate, uf);
+}
+
unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long pgoff)
diff --git a/mm/nommu.c b/mm/nommu.c
index ed3934bc2de4..7f2136129c72 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1009,7 +1009,8 @@ static int do_mmap_private(struct vm_area_struct *vma,
/*
* handle mapping creation for uClinux
*/
-unsigned long do_mmap(struct file *file,
+unsigned long __do_mmap(struct mm_struct *mm,
+ struct file *file,
unsigned long addr,
unsigned long len,
unsigned long prot,
@@ -1246,6 +1247,15 @@ unsigned long do_mmap(struct file *file,
return -ENOMEM;
}
+unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len,
+ unsigned long prot, unsigned long flags,
+ vm_flags_t vm_flags, unsigned long pgoff,
+ unsigned long *populate, struct list_head *uf)
+{
+ return __do_mmap(current->mm, file, addr, len, prot, flags,
+ vm_flags, pgoff, populate, uf);
+}
+
unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long pgoff)
diff --git a/mm/util.c b/mm/util.c
index 3cc949a0b7ed..ecc2087f744a 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -588,6 +588,56 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
return ret;
}
+/**
+ * vm_mmap_seal_remote - install a sealed PROT_READ MAP_SHARED file mapping
+ * into @mm, without target-side cooperation.
+ * @mm: Target mm; caller holds a reference (e.g. get_task_mm()).
+ * @file: Backing file.
+ * @addr: Page-aligned address (MAP_FIXED_NOREPLACE: -EEXIST if occupied).
+ * @len: Length in bytes (page-aligned).
+ * @pgoff: Page offset into @file.
+ *
+ * The VMA is created VM_SEALED, so it is immediately immutable against the
+ * target mm's owner and its CLONE_VM peers. LSM/fsnotify hooks run against
+ * %current; cross-task authorization is the caller's responsibility (no
+ * ptrace_may_access check).
+ *
+ * Returns the mapped address on success, or a negative errno.
+ */
+unsigned long vm_mmap_seal_remote(struct mm_struct *mm, struct file *file,
+ unsigned long addr, unsigned long len, unsigned long pgoff)
+{
+ const unsigned long prot = PROT_READ;
+ const unsigned long flags = MAP_SHARED | MAP_FIXED_NOREPLACE;
+ loff_t off = (loff_t)pgoff << PAGE_SHIFT;
+ unsigned long ret;
+ unsigned long populate;
+ LIST_HEAD(uf);
+
+ if (WARN_ON_ONCE(!mm))
+ return -EINVAL;
+ if (!VM_SEALED) /* sealing unavailable (e.g. !CONFIG_64BIT) */
+ return -EOPNOTSUPP;
+
+ ret = security_mmap_file(file, prot, flags);
+ if (!ret)
+ ret = fsnotify_mmap_perm(file, prot, off, len);
+ if (!ret) {
+ if (mmap_write_lock_killable(mm))
+ return -EINTR;
+ ret = __do_mmap(mm, file, addr, len, prot, flags, VM_SEALED,
+ pgoff, &populate, &uf);
+ mmap_write_unlock(mm);
+ userfaultfd_unmap_complete(mm, &uf);
+ /*
+ * Do not mm_populate() against a foreign mm; the target task
+ * will fault pages in on first access.
+ */
+ }
+ return ret;
+}
+EXPORT_SYMBOL_GPL(vm_mmap_seal_remote);
+
/*
* Perform a userland memory mapping into the current process address space. See
* the comment for do_mmap() for more details on this operation in general.
diff --git a/mm/vma.c b/mm/vma.c
index d90791b00a7b..fdd14349f719 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -2729,11 +2729,10 @@ static bool can_set_ksm_flags_early(struct mmap_state *map)
return false;
}
-static unsigned long __mmap_region(struct file *file, unsigned long addr,
- unsigned long len, vma_flags_t vma_flags,
+static unsigned long __mmap_region(struct mm_struct *mm, struct file *file,
+ unsigned long addr, unsigned long len, vma_flags_t vma_flags,
unsigned long pgoff, struct list_head *uf)
{
- struct mm_struct *mm = current->mm;
struct vm_area_struct *vma = NULL;
bool have_mmap_prepare = file && file->f_op->mmap_prepare;
VMA_ITERATOR(vmi, mm, addr);
@@ -2827,15 +2826,16 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr,
* Returns: Either an error, or the address at which the requested mapping has
* been performed.
*/
-unsigned long mmap_region(struct file *file, unsigned long addr,
- unsigned long len, vm_flags_t vm_flags,
- unsigned long pgoff, struct list_head *uf)
+unsigned long mmap_region(struct mm_struct *mm, struct file *file,
+ unsigned long addr, unsigned long len,
+ vm_flags_t vm_flags, unsigned long pgoff,
+ struct list_head *uf)
{
unsigned long ret;
bool writable_file_mapping = false;
const vma_flags_t vma_flags = legacy_to_vma_flags(vm_flags);
- mmap_assert_write_locked(current->mm);
+ mmap_assert_write_locked(mm);
/* Check to see if MDWE is applicable. */
if (map_deny_write_exec(&vma_flags, &vma_flags))
@@ -2854,13 +2854,13 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
writable_file_mapping = true;
}
- ret = __mmap_region(file, addr, len, vma_flags, pgoff, uf);
+ ret = __mmap_region(mm, file, addr, len, vma_flags, pgoff, uf);
/* Clear our write mapping regardless of error. */
if (writable_file_mapping)
mapping_unmap_writable(file->f_mapping);
- validate_mm(current->mm);
+ validate_mm(mm);
return ret;
}
diff --git a/mm/vma.h b/mm/vma.h
index 8e4b61a7304c..4f5222ad2e9d 100644
--- a/mm/vma.h
+++ b/mm/vma.h
@@ -459,9 +459,9 @@ bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
int mm_take_all_locks(struct mm_struct *mm);
void mm_drop_all_locks(struct mm_struct *mm);
-unsigned long mmap_region(struct file *file, unsigned long addr,
- unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
- struct list_head *uf);
+unsigned long mmap_region(struct mm_struct *mm, struct file *file,
+ unsigned long addr, unsigned long len, vm_flags_t vm_flags,
+ unsigned long pgoff, struct list_head *uf);
int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *brkvma,
unsigned long addr, unsigned long request,
--
2.43.0