[RFC PATCH 1/8] mm/madvise: propagate vma->vm_end changes

From: Nadav Amit
Date: Sun Sep 26 2021 - 19:44:03 EST


From: Nadav Amit <namit@xxxxxxxxxx>

The comment in madvise_dontneed_free() says that vma splits that occur
while the mmap-lock is dropped, during userfaultfd_remove(), should be
handled correctly, but nothing in the code indicates that it is so: prev
is invalidated, and do_madvise() will therefore continue to update VMAs
from the "obsolete" end (i.e., the one before the split).

Propagate the changes to end from madvise_dontneed_free() back to
do_madvise() and continue the updates from the new end accordingly.

Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Minchan Kim <minchan@xxxxxxxxxx>
Cc: Colin Cross <ccross@xxxxxxxxxx>
Cc: Suren Baghdasarya <surenb@xxxxxxxxxx>
Cc: Mike Rapoport <rppt@xxxxxxxxxxxxxxxxxx>
Fixes: 70ccb92fdd90 ("userfaultfd: non-cooperative: userfaultfd_remove revalidate vma in MADV_DONTNEED")
Signed-off-by: Nadav Amit <namit@xxxxxxxxxx>
---
mm/madvise.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/mm/madvise.c b/mm/madvise.c
index 0734db8d53a7..a2b05352ebfe 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -768,10 +768,11 @@ static long madvise_dontneed_single_vma(struct vm_area_struct *vma,

static long madvise_dontneed_free(struct vm_area_struct *vma,
struct vm_area_struct **prev,
- unsigned long start, unsigned long end,
+ unsigned long start, unsigned long *pend,
int behavior)
{
struct mm_struct *mm = vma->vm_mm;
+ unsigned long end = *pend;

*prev = vma;
if (!can_madv_lru_vma(vma))
@@ -811,6 +812,7 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
* end-vma->vm_end range, but the manager can
* handle a repetition fine.
*/
+ *pend = end;
end = vma->vm_end;
}
VM_WARN_ON(start >= end);
@@ -980,8 +982,10 @@ static int madvise_inject_error(int behavior,

static long
madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
- unsigned long start, unsigned long end, int behavior)
+ unsigned long start, unsigned long *pend, int behavior)
{
+ unsigned long end = *pend;
+
switch (behavior) {
case MADV_REMOVE:
return madvise_remove(vma, prev, start, end);
@@ -993,7 +997,7 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
return madvise_pageout(vma, prev, start, end);
case MADV_FREE:
case MADV_DONTNEED:
- return madvise_dontneed_free(vma, prev, start, end, behavior);
+ return madvise_dontneed_free(vma, prev, start, pend, behavior);
case MADV_POPULATE_READ:
case MADV_POPULATE_WRITE:
return madvise_populate(vma, prev, start, end, behavior);
@@ -1199,7 +1203,7 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh
tmp = end;

/* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
- error = madvise_vma(vma, &prev, start, tmp, behavior);
+ error = madvise_vma(vma, &prev, start, &tmp, behavior);
if (error)
goto out;
start = tmp;
--
2.25.1