[RFC PATCH 5/5] mm/mmap: Use split munmap calls for MAP_FIXED
From: Liam R. Howlett
Date: Fri May 31 2024 - 12:34:38 EST
Use vms_gather_munmap_vmas() and vms_complete_munmap_vmas() to prepare
and execute the unmapping after the new area is written to the maple
tree. Delaying the unmapping avoids RCU readers seeing a gap in the
vmas, which isn't supposed to exist logically.
Gathering the vmas that will be unmapped allows for the accounting work
to be calculated prior to checking if there is enough memory. Using the
number calculated during vms_gather_munmap_vmas() allows code to be
reduced in mmap_region(). This removes the only caller to
count_vma_pages_range(), so the function has been dropped.
This does have the side effect of allowing vmas to be split for unmap
and fail may_expand_vm(), but the number of pages covered will not
change.
Note that do_vmi_munmap() was previously used to munmap, which checked
alignment and overflow. These checks were unnecessary as do_mmap()
already checks these cases, and arch/mips/kernel/vdso.c
arch_setup_additional_pages() uses predefined values that must already
pass these checks.
Signed-off-by: Liam R. Howlett <Liam.Howlett@xxxxxxxxxx>
---
mm/mmap.c | 84 +++++++++++++++++++++++++++----------------------------
1 file changed, 42 insertions(+), 42 deletions(-)
diff --git a/mm/mmap.c b/mm/mmap.c
index 3e0930c09213..f968181fafd5 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -364,23 +364,6 @@ anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
}
-static unsigned long count_vma_pages_range(struct mm_struct *mm,
- unsigned long addr, unsigned long end)
-{
- VMA_ITERATOR(vmi, mm, addr);
- struct vm_area_struct *vma;
- unsigned long nr_pages = 0;
-
- for_each_vma_range(vmi, vma, end) {
- unsigned long vm_start = max(addr, vma->vm_start);
- unsigned long vm_end = min(end, vma->vm_end);
-
- nr_pages += PHYS_PFN(vm_end - vm_start);
- }
-
- return nr_pages;
-}
-
static void __vma_link_file(struct vm_area_struct *vma,
struct address_space *mapping)
{
@@ -2863,47 +2846,61 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
struct vm_area_struct *next, *prev, *merge;
pgoff_t pglen = len >> PAGE_SHIFT;
unsigned long charged = 0;
+ struct vma_munmap_struct vms;
+ struct ma_state mas_detach;
unsigned long end = addr + len;
unsigned long merge_start = addr, merge_end = end;
bool writable_file_mapping = false;
pgoff_t vm_pgoff;
- int error;
+ int error = -ENOMEM;
VMA_ITERATOR(vmi, mm, addr);
- /* Check against address space limit. */
- if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
- unsigned long nr_pages;
+ vma = vma_find(&vmi, end);
+ if (vma) {
+ struct maple_tree mt_detach;
- /*
- * MAP_FIXED may remove pages of mappings that intersects with
- * requested mapping. Account for the pages it would unmap.
- */
- nr_pages = count_vma_pages_range(mm, addr, end);
+ /* Prevent unmapping a sealed VMA. */
+ if (unlikely(!can_modify_mm(mm, addr, end)))
+ return -EPERM;
- if (!may_expand_vm(mm, vm_flags,
- (len >> PAGE_SHIFT) - nr_pages))
+ mt_init_flags(&mt_detach, vmi.mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
+ mt_on_stack(mt_detach);
+ mas_init(&mas_detach, &mt_detach, 0);
+ /* arch_unmap() might do unmaps itself. */
+ arch_unmap(mm, addr, end);
+ init_vma_munmap(&vms, &vmi, vma, addr, end, uf,
+ /* unlock = */ false);
+ /* Prepare to unmap any existing mapping in the area */
+ if (vms_gather_munmap_vmas(&vms, &mas_detach))
return -ENOMEM;
+ next = vms.next;
+ prev = vms.prev;
+ vma = NULL;
+ vma_iter_prev_range(&vmi);
+ } else {
+ vms.end = 0; /* vms.end == 0 indicates there is no MAP_FIXED */
+ vms.nr_pages = 0;
+ next = vma_next(&vmi);
+ prev = vma_prev(&vmi);
}
- /* Unmap any existing mapping in the area */
- error = do_vmi_munmap(&vmi, mm, addr, len, uf, false);
- if (error == -EPERM)
- return error;
- else if (error)
- return -ENOMEM;
-
/*
- * Private writable mapping: check memory availability
+ * Check against address space limit.
+ * MAP_FIXED may remove pages of mappings that intersects with
+ * requested mapping. Account for the pages it would unmap.
*/
+ if (!may_expand_vm(mm, vm_flags, (len >> PAGE_SHIFT) - vms.nr_pages))
+ goto no_mem;
+
+ /* Private writable mapping: check memory availability */
if (accountable_mapping(file, vm_flags)) {
charged = len >> PAGE_SHIFT;
+ charged -= vms.nr_pages; /* MAP_FIXED removed memory */
if (security_vm_enough_memory_mm(mm, charged))
- return -ENOMEM;
+ goto no_mem;
vm_flags |= VM_ACCOUNT;
}
- next = vma_next(&vmi);
- prev = vma_prev(&vmi);
if (vm_flags & VM_SPECIAL) {
if (prev)
vma_iter_next_range(&vmi);
@@ -2950,10 +2947,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
* not unmapped, but the maps are removed from the list.
*/
vma = vm_area_alloc(mm);
- if (!vma) {
- error = -ENOMEM;
+ if (!vma)
goto unacct_error;
- }
vma_iter_config(&vmi, addr, end);
vma_set_range(vma, addr, end, pgoff);
@@ -3075,6 +3070,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
vm_flags_set(vma, VM_SOFTDIRTY);
vma_set_page_prot(vma);
+ if (vms.end)
+ vms_complete_munmap_vmas(&vms, &mas_detach);
validate_mm(mm);
return addr;
@@ -3100,6 +3097,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
unacct_error:
if (charged)
vm_unacct_memory(charged);
+no_mem:
+ if (vms.end)
+ abort_munmap_vmas(&mas_detach);
validate_mm(mm);
return error;
}
--
2.43.0