[PATCH] mm/khugepaged: skip shmem with armed userfaultfd

From: David Stevens
Date: Tue Jan 31 2023 - 22:42:46 EST


From: David Stevens <stevensd@xxxxxxxxxxxx>

Collapsing memory in a vma that has an armed userfaultfd results in
zero-filling any missing pages, which breaks user-space paging for those
filled pages. Avoid khugepage bypassing userfaultfd by not collapsing
pages in shmem reached via scanning a vma with an armed userfaultfd if
doing so would zero-fill any pages.

Signed-off-by: David Stevens <stevensd@xxxxxxxxxxxx>
---
mm/khugepaged.c | 35 ++++++++++++++++++++++++-----------
1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 79be13133322..48e944fb8972 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1736,8 +1736,8 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff,
* + restore gaps in the page cache;
* + unlock and free huge page;
*/
-static int collapse_file(struct mm_struct *mm, unsigned long addr,
- struct file *file, pgoff_t start,
+static int collapse_file(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long addr, struct file *file, pgoff_t start,
struct collapse_control *cc)
{
struct address_space *mapping = file->f_mapping;
@@ -1784,6 +1784,9 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
* be able to map it or use it in another way until we unlock it.
*/

+ if (is_shmem)
+ mmap_read_lock(mm);
+
xas_set(&xas, start);
for (index = start; index < end; index++) {
struct page *page = xas_next(&xas);
@@ -1792,6 +1795,10 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
VM_BUG_ON(index != xas.xa_index);
if (is_shmem) {
if (!page) {
+ if (userfaultfd_armed(vma)) {
+ result = SCAN_EXCEED_NONE_PTE;
+ goto xa_locked;
+ }
/*
* Stop if extent has been truncated or
* hole-punched, and is now completely
@@ -2095,6 +2102,8 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
hpage->mapping = NULL;
}

+ if (is_shmem)
+ mmap_read_unlock(mm);
if (hpage)
unlock_page(hpage);
out:
@@ -2108,8 +2117,8 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
return result;
}

-static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
- struct file *file, pgoff_t start,
+static int hpage_collapse_scan_file(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long addr, struct file *file, pgoff_t start,
struct collapse_control *cc)
{
struct page *page = NULL;
@@ -2118,6 +2127,9 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
int present, swap;
int node = NUMA_NO_NODE;
int result = SCAN_SUCCEED;
+ bool uffd_was_armed = userfaultfd_armed(vma);
+
+ mmap_read_unlock(mm);

present = 0;
swap = 0;
@@ -2193,13 +2205,16 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
}
rcu_read_unlock();

+ if (uffd_was_armed && present < HPAGE_PMD_NR)
+ result = SCAN_EXCEED_SWAP_PTE;
+
if (result == SCAN_SUCCEED) {
if (cc->is_khugepaged &&
present < HPAGE_PMD_NR - khugepaged_max_ptes_none) {
result = SCAN_EXCEED_NONE_PTE;
count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
} else {
- result = collapse_file(mm, addr, file, start, cc);
+ result = collapse_file(mm, vma, addr, file, start, cc);
}
}

@@ -2207,8 +2222,8 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
return result;
}
#else
-static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
- struct file *file, pgoff_t start,
+static int hpage_collapse_scan_file(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long addr, struct file *file, pgoff_t start,
struct collapse_control *cc)
{
BUILD_BUG();
@@ -2304,8 +2319,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int *result,
pgoff_t pgoff = linear_page_index(vma,
khugepaged_scan.address);

- mmap_read_unlock(mm);
- *result = hpage_collapse_scan_file(mm,
+ *result = hpage_collapse_scan_file(mm, vma,
khugepaged_scan.address,
file, pgoff, cc);
mmap_locked = false;
@@ -2656,9 +2670,8 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev,
struct file *file = get_file(vma->vm_file);
pgoff_t pgoff = linear_page_index(vma, addr);

- mmap_read_unlock(mm);
mmap_locked = false;
- result = hpage_collapse_scan_file(mm, addr, file, pgoff,
+ result = hpage_collapse_scan_file(mm, vma, addr, file, pgoff,
cc);
fput(file);
} else {
--
2.39.1.456.gfc5497dd1b-goog