[PATCH 10/14] mm: Set up vma iterator for vma_iter_prealloc() calls

From: Liam R. Howlett
Date: Wed May 31 2023 - 22:17:41 EST


Set the correct limits for vma_iter_prealloc() calls so that the maple
tree can be smarter about how many nodes are needed.

Signed-off-by: Liam R. Howlett <Liam.Howlett@xxxxxxxxxx>
---
fs/exec.c | 1 +
mm/internal.h | 18 +++++-------
mm/mmap.c | 81 +++++++++++++++++++++++++++++++--------------------
mm/nommu.c | 33 +++++++++------------
4 files changed, 72 insertions(+), 61 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 25c65b64544b..dc0ba74ebb74 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -697,6 +697,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
if (vma != vma_next(&vmi))
return -EFAULT;

+ vma_iter_prev_range(&vmi);
/*
* cover the whole range: [new_start, old_end)
*/
diff --git a/mm/internal.h b/mm/internal.h
index d78fd0fafa3b..531b2e95146c 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1045,23 +1045,21 @@ static inline void vma_iter_config(struct vma_iterator *vmi,
/*
* VMA Iterator functions shared between nommu and mmap
*/
-static inline int vma_iter_prealloc(struct vma_iterator *vmi)
+static inline int vma_iter_prealloc(struct vma_iterator *vmi,
+ struct vm_area_struct *vma)
{
- return mas_preallocate(&vmi->mas, NULL, GFP_KERNEL);
+ return mas_preallocate(&vmi->mas, vma, GFP_KERNEL);
}

-static inline void vma_iter_clear(struct vma_iterator *vmi,
- unsigned long start, unsigned long end)
+static inline void vma_iter_clear(struct vma_iterator *vmi)
{
- mas_set_range(&vmi->mas, start, end - 1);
mas_store_prealloc(&vmi->mas, NULL);
}

static inline int vma_iter_clear_gfp(struct vma_iterator *vmi,
unsigned long start, unsigned long end, gfp_t gfp)
{
- vmi->mas.index = start;
- vmi->mas.last = end - 1;
+ __mas_set_range(&vmi->mas, start, end - 1);
mas_store_gfp(&vmi->mas, NULL, gfp);
if (unlikely(mas_is_err(&vmi->mas)))
return -ENOMEM;
@@ -1098,8 +1096,7 @@ static inline void vma_iter_store(struct vma_iterator *vmi,
((vmi->mas.index > vma->vm_start) || (vmi->mas.last < vma->vm_start)))
vma_iter_invalidate(vmi);

- vmi->mas.index = vma->vm_start;
- vmi->mas.last = vma->vm_end - 1;
+ __mas_set_range(&vmi->mas, vma->vm_start, vma->vm_end - 1);
mas_store_prealloc(&vmi->mas, vma);
}

@@ -1110,8 +1107,7 @@ static inline int vma_iter_store_gfp(struct vma_iterator *vmi,
((vmi->mas.index > vma->vm_start) || (vmi->mas.last < vma->vm_start)))
vma_iter_invalidate(vmi);

- vmi->mas.index = vma->vm_start;
- vmi->mas.last = vma->vm_end - 1;
+ __mas_set_range(&vmi->mas, vma->vm_start, vma->vm_end - 1);
mas_store_gfp(&vmi->mas, vma, gfp);
if (unlikely(mas_is_err(&vmi->mas)))
return -ENOMEM;
diff --git a/mm/mmap.c b/mm/mmap.c
index 22c71dff762b..eaebcc8f60d2 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -411,7 +411,8 @@ static int vma_link(struct mm_struct *mm, struct vm_area_struct *vma)
VMA_ITERATOR(vmi, mm, 0);
struct address_space *mapping = NULL;

- if (vma_iter_prealloc(&vmi))
+ vma_iter_config(&vmi, vma->vm_start, vma->vm_end);
+ if (vma_iter_prealloc(&vmi, vma))
return -ENOMEM;

if (vma->vm_file) {
@@ -664,19 +665,16 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
/* Only handles expanding */
VM_WARN_ON(vma->vm_start < start || vma->vm_end > end);

- if (vma_iter_prealloc(vmi))
+ /* Note: vma iterator must be pointing to 'start' */
+ vma_iter_config(vmi, start, end);
+ if (vma_iter_prealloc(vmi, vma))
goto nomem;

vma_prepare(&vp);
vma_adjust_trans_huge(vma, start, end, 0);
- /* VMA iterator points to previous, so set to start if necessary */
- if (vma_iter_addr(vmi) != start)
- vma_iter_set(vmi, start);
-
vma->vm_start = start;
vma->vm_end = end;
vma->vm_pgoff = pgoff;
- /* Note: mas must be pointing to the expanding VMA */
vma_iter_store(vmi, vma);

vma_complete(&vp, vmi, vma->vm_mm);
@@ -703,19 +701,19 @@ int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,

WARN_ON((vma->vm_start != start) && (vma->vm_end != end));

- if (vma_iter_prealloc(vmi))
+ if (vma->vm_start < start)
+ vma_iter_config(vmi, vma->vm_start, start);
+ else
+ vma_iter_config(vmi, end, vma->vm_end);
+
+ if (vma_iter_prealloc(vmi, NULL))
return -ENOMEM;

init_vma_prep(&vp, vma);
vma_prepare(&vp);
vma_adjust_trans_huge(vma, start, end, 0);

- if (vma->vm_start < start)
- vma_iter_clear(vmi, vma->vm_start, start);
-
- if (vma->vm_end > end)
- vma_iter_clear(vmi, end, vma->vm_end);
-
+ vma_iter_clear(vmi);
vma->vm_start = start;
vma->vm_end = end;
vma->vm_pgoff = pgoff;
@@ -991,7 +989,17 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
if (err)
return NULL;

- if (vma_iter_prealloc(vmi))
+ if (vma_start < vma->vm_start || vma_end > vma->vm_end)
+ vma_expanded = true;
+
+ if (vma_expanded) {
+ vma_iter_config(vmi, vma_start, vma_end);
+ } else {
+ vma_iter_config(vmi, adjust->vm_start + adj_start,
+ adjust->vm_end);
+ }
+
+ if (vma_iter_prealloc(vmi, vma))
return NULL;

init_multi_vma_prep(&vp, vma, adjust, remove, remove2);
@@ -1000,8 +1008,6 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,

vma_prepare(&vp);
vma_adjust_trans_huge(vma, vma_start, vma_end, adj_start);
- if (vma_start < vma->vm_start || vma_end > vma->vm_end)
- vma_expanded = true;

vma->vm_start = vma_start;
vma->vm_end = vma_end;
@@ -1945,7 +1951,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
struct vm_area_struct *next;
unsigned long gap_addr;
int error = 0;
- MA_STATE(mas, &mm->mm_mt, 0, 0);
+ MA_STATE(mas, &mm->mm_mt, vma->vm_start, address);

if (!(vma->vm_flags & VM_GROWSUP))
return -EFAULT;
@@ -1970,6 +1976,10 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
/* Check that both stack segments have the same anon_vma? */
}

+ if (next)
+ mas_prev_range(&mas, address);
+
+ __mas_set_range(&mas, vma->vm_start, address - 1);
if (mas_preallocate(&mas, vma, GFP_KERNEL))
return -ENOMEM;

@@ -2013,7 +2023,6 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
anon_vma_interval_tree_pre_update_vma(vma);
vma->vm_end = address;
/* Overwrite old entry in mtree. */
- mas_set_range(&mas, vma->vm_start, address - 1);
mas_store_prealloc(&mas, vma);
anon_vma_interval_tree_post_update_vma(vma);
spin_unlock(&mm->page_table_lock);
@@ -2052,6 +2061,10 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
return -ENOMEM;
}

+ if (prev)
+ mas_next_range(&mas, vma->vm_start);
+
+ __mas_set_range(&mas, address, vma->vm_end - 1);
if (mas_preallocate(&mas, vma, GFP_KERNEL))
return -ENOMEM;

@@ -2096,7 +2109,6 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
vma->vm_start = address;
vma->vm_pgoff -= grow;
/* Overwrite old entry in mtree. */
- mas_set_range(&mas, address, vma->vm_end - 1);
mas_store_prealloc(&mas, vma);
anon_vma_interval_tree_post_update_vma(vma);
spin_unlock(&mm->page_table_lock);
@@ -2255,10 +2267,6 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
if (!new)
return -ENOMEM;

- err = -ENOMEM;
- if (vma_iter_prealloc(vmi))
- goto out_free_vma;
-
if (new_below) {
new->vm_end = addr;
} else {
@@ -2266,6 +2274,11 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
}

+ err = -ENOMEM;
+ vma_iter_config(vmi, new->vm_start, new->vm_end);
+ if (vma_iter_prealloc(vmi, new))
+ goto out_free_vma;
+
err = vma_dup_policy(vma, new);
if (err)
goto out_free_vmi;
@@ -2600,8 +2613,12 @@ unsigned long mmap_region(struct file *file, unsigned long addr,

next = vma_next(&vmi);
prev = vma_prev(&vmi);
- if (vm_flags & VM_SPECIAL)
+ if (vm_flags & VM_SPECIAL) {
+ if (prev)
+ vma_iter_next_range(&vmi);
+
goto cannot_expand;
+ }

/* Attempt to expand an old mapping */
/* Check next */
@@ -2611,6 +2628,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
merge_end = next->vm_end;
vma = next;
vm_pgoff = next->vm_pgoff - pglen;
+
}

/* Check prev */
@@ -2622,9 +2640,10 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
merge_start = prev->vm_start;
vma = prev;
vm_pgoff = prev->vm_pgoff;
+ } else if (prev) {
+ vma_iter_next_range(&vmi);
}

-
/* Actually expand, if possible */
if (vma &&
!vma_expand(&vmi, vma, merge_start, merge_end, vm_pgoff, next)) {
@@ -2633,9 +2652,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
}

cannot_expand:
- if (prev)
- vma_iter_next_range(&vmi);
-
/*
* Determine the object being mapped and call the appropriate
* specific mapper. the address has already been validated, but
@@ -2721,7 +2737,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
goto close_and_free_vma;

error = -ENOMEM;
- if (vma_iter_prealloc(&vmi))
+ if (vma_iter_prealloc(&vmi, vma))
goto close_and_free_vma;

if (vma->vm_file)
@@ -2994,7 +3010,8 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
if (vma && vma->vm_end == addr && !vma_policy(vma) &&
can_vma_merge_after(vma, flags, NULL, NULL,
addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL)) {
- if (vma_iter_prealloc(vmi))
+ vma_iter_config(vmi, vma->vm_start, addr + len);
+ if (vma_iter_prealloc(vmi, vma))
goto unacct_fail;

init_vma_prep(&vp, vma);
@@ -3009,6 +3026,8 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
goto out;
}

+ if (vma)
+ vma_iter_next_range(vmi);
/* create a vma struct for an anonymous mapping */
vma = vm_area_alloc(mm);
if (!vma)
diff --git a/mm/nommu.c b/mm/nommu.c
index a764b86b132a..a96b889cc17e 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -583,7 +583,8 @@ static int delete_vma_from_mm(struct vm_area_struct *vma)
{
VMA_ITERATOR(vmi, vma->vm_mm, vma->vm_start);

- if (vma_iter_prealloc(&vmi)) {
+ vma_iter_config(&vmi, vma->vm_start, vma->vm_end);
+ if (vma_iter_prealloc(&vmi, vma)) {
pr_warn("Allocation of vma tree for process %d failed\n",
current->pid);
return -ENOMEM;
@@ -591,7 +592,7 @@ static int delete_vma_from_mm(struct vm_area_struct *vma)
cleanup_vma_from_mm(vma);

/* remove from the MM's tree and list */
- vma_iter_clear(&vmi, vma->vm_start, vma->vm_end);
+ vma_iter_clear(&vmi);
return 0;
}
/*
@@ -1041,9 +1042,6 @@ unsigned long do_mmap(struct file *file,
if (!vma)
goto error_getting_vma;

- if (vma_iter_prealloc(&vmi))
- goto error_vma_iter_prealloc;
-
region->vm_usage = 1;
region->vm_flags = vm_flags;
region->vm_pgoff = pgoff;
@@ -1185,6 +1183,10 @@ unsigned long do_mmap(struct file *file,

share:
BUG_ON(!vma->vm_region);
+ vma_iter_config(&vmi, vma->vm_start, vma->vm_end);
+ if (vma_iter_prealloc(&vmi, vma))
+ goto error_just_free;
+
setup_vma_to_mm(vma, current->mm);
current->mm->map_count++;
/* add the VMA to the tree */
@@ -1231,14 +1233,6 @@ unsigned long do_mmap(struct file *file,
len, current->pid);
show_free_areas(0, NULL);
return -ENOMEM;
-
-error_vma_iter_prealloc:
- kmem_cache_free(vm_region_jar, region);
- vm_area_free(vma);
- pr_warn("Allocation of vma tree for process %d failed\n", current->pid);
- show_free_areas(0, NULL);
- return -ENOMEM;
-
}

unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
@@ -1323,12 +1317,6 @@ int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
if (!new)
goto err_vma_dup;

- if (vma_iter_prealloc(vmi)) {
- pr_warn("Allocation of vma tree for process %d failed\n",
- current->pid);
- goto err_vmi_preallocate;
- }
-
/* most fields are the same, copy all, and then fixup */
*region = *vma->vm_region;
new->vm_region = region;
@@ -1342,6 +1330,13 @@ int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
region->vm_pgoff = new->vm_pgoff += npages;
}

+ vma_iter_config(vmi, new->vm_start, new->vm_end);
+ if (vma_iter_prealloc(vmi, vma)) {
+ pr_warn("Allocation of vma tree for process %d failed\n",
+ current->pid);
+ goto err_vmi_preallocate;
+ }
+
if (new->vm_ops && new->vm_ops->open)
new->vm_ops->open(new);

--
2.39.2