[PATCH] userfaultfd: Fix regression in userfaultfd_unmap_prep()

From: Liam R. Howlett
Date: Wed May 31 2023 - 21:54:54 EST


Android reported a regression in the userfaultfd unmap path. A closer
inspection on the userfaultfd_unmap_prep() change showed that a second
tree walk would be necessary in the reworked code.

Fix the regression by passing each VMA that will be unmapped through to
the userfaultfd_unmap_prep() function as they are added to the unmap
list, instead of re-walking the tree for the VMA.

Fixes: 69dbe6daf104 ("userfaultfd: use maple tree iterator to iterate VMAs")
Reported-by: Suren Baghdasaryan <surenb@xxxxxxxxxx>
Suggested-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx>
Signed-off-by: Liam R. Howlett <Liam.Howlett@xxxxxxxxxx>
---
fs/userfaultfd.c | 35 +++++++++++++++--------------------
include/linux/userfaultfd_k.h | 6 +++---
mm/mmap.c | 31 +++++++++++++++----------------
3 files changed, 33 insertions(+), 39 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 4e800bb7d2ab..a2a42a02848f 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -857,31 +857,26 @@ static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps,
return false;
}

-int userfaultfd_unmap_prep(struct mm_struct *mm, unsigned long start,
+int userfaultfd_unmap_prep(struct vm_area_struct *vma, unsigned long start,
unsigned long end, struct list_head *unmaps)
{
- VMA_ITERATOR(vmi, mm, start);
- struct vm_area_struct *vma;
-
- for_each_vma_range(vmi, vma, end) {
- struct userfaultfd_unmap_ctx *unmap_ctx;
- struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
+ struct userfaultfd_unmap_ctx *unmap_ctx;
+ struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;

- if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) ||
- has_unmap_ctx(ctx, unmaps, start, end))
- continue;
+ if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) ||
+ has_unmap_ctx(ctx, unmaps, start, end))
+ return 0;

- unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL);
- if (!unmap_ctx)
- return -ENOMEM;
+ unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL);
+ if (!unmap_ctx)
+ return -ENOMEM;

- userfaultfd_ctx_get(ctx);
- atomic_inc(&ctx->mmap_changing);
- unmap_ctx->ctx = ctx;
- unmap_ctx->start = start;
- unmap_ctx->end = end;
- list_add_tail(&unmap_ctx->list, unmaps);
- }
+ userfaultfd_ctx_get(ctx);
+ atomic_inc(&ctx->mmap_changing);
+ unmap_ctx->ctx = ctx;
+ unmap_ctx->start = start;
+ unmap_ctx->end = end;
+ list_add_tail(&unmap_ctx->list, unmaps);

return 0;
}
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index d78b01524349..ac7b0c96d351 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -188,8 +188,8 @@ extern bool userfaultfd_remove(struct vm_area_struct *vma,
unsigned long start,
unsigned long end);

-extern int userfaultfd_unmap_prep(struct mm_struct *mm, unsigned long start,
- unsigned long end, struct list_head *uf);
+extern int userfaultfd_unmap_prep(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end, struct list_head *uf);
extern void userfaultfd_unmap_complete(struct mm_struct *mm,
struct list_head *uf);
extern bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma);
@@ -271,7 +271,7 @@ static inline bool userfaultfd_remove(struct vm_area_struct *vma,
return true;
}

-static inline int userfaultfd_unmap_prep(struct mm_struct *mm,
+static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
struct list_head *uf)
{
diff --git a/mm/mmap.c b/mm/mmap.c
index 99dc60f64816..1503a7bdb192 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2420,6 +2420,21 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
goto munmap_sidetree_failed;

count++;
+ if (unlikely(uf)) {
+ /*
+ * If userfaultfd_unmap_prep returns an error the vmas
+ * will remain split, but userland will get a
+ * highly unexpected error anyway. This is no
+ * different than the case where the first of the two
+ * __split_vma fails, but we don't undo the first
+ * split, despite we could. This is unlikely enough
+ * failure that it's not worth optimizing it for.
+ */
+ error = userfaultfd_unmap_prep(next, start, end, uf);
+
+ if (error)
+ goto userfaultfd_error;
+ }
#ifdef CONFIG_DEBUG_VM_MAPLE_TREE
BUG_ON(next->vm_start < start);
BUG_ON(next->vm_start > end);
@@ -2432,22 +2447,6 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
if (!next)
next = vma_next(vmi);

- if (unlikely(uf)) {
- /*
- * If userfaultfd_unmap_prep returns an error the vmas
- * will remain split, but userland will get a
- * highly unexpected error anyway. This is no
- * different than the case where the first of the two
- * __split_vma fails, but we don't undo the first
- * split, despite we could. This is unlikely enough
- * failure that it's not worth optimizing it for.
- */
- error = userfaultfd_unmap_prep(mm, start, end, uf);
-
- if (error)
- goto userfaultfd_error;
- }
-
#if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
/* Make sure no VMAs are about to be lost. */
{
--
2.39.2