[PATCH 1/3] mm: enable MADV_DONTNEED for hugetlb mappings

From: Mike Kravetz
Date: Fri Jan 28 2022 - 17:26:46 EST


MADV_DONTNEED is currently disabled for hugetlb mappings. This
certainly makes sense in shared file mappings as the pagecache maintains
a reference to the page and it will never be freed. However, it could
be useful to unmap and free pages in private mappings.

The only thing preventing MADV_DONTNEED from working on hugetlb mappings
is a check in can_madv_lru_vma(). To allow support for hugetlb mappings
create and use a new routine madvise_dontneed_free_valid_vma() that will
allow hugetlb mappings. Also, before calling zap_page_range in the
DONTNEED case align start and size to huge page size for hugetlb vmas.
madvise only requires PAGE_SIZE alignment, but the hugetlb unmap routine
requires huge page size alignment.

Signed-off-by: Mike Kravetz <mike.kravetz@xxxxxxxxxx>
---
mm/madvise.c | 24 ++++++++++++++++++++++--
1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/mm/madvise.c b/mm/madvise.c
index 5604064df464..01b4d145d8f0 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -796,10 +796,30 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
static long madvise_dontneed_single_vma(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
+ /*
+ * start and size (end - start) must be huge page size aligned
+ * for hugetlb vmas.
+ */
+ if (vma->vm_flags & VM_HUGETLB) {
+ struct hstate *h = hstate_vma(vma);
+
+ start = ALIGN_DOWN(start, huge_page_size(h));
+ end = ALIGN(end, huge_page_size(h));
+ }
+
zap_page_range(vma, start, end - start);
return 0;
}

+static bool madvise_dontneed_free_valid_vma(struct vm_area_struct *vma,
+ int behavior)
+{
+ if (vma->vm_flags & VM_HUGETLB)
+ return behavior == MADV_DONTNEED;
+ else
+ return can_madv_lru_vma(vma);
+}
+
static long madvise_dontneed_free(struct vm_area_struct *vma,
struct vm_area_struct **prev,
unsigned long start, unsigned long end,
@@ -808,7 +828,7 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
struct mm_struct *mm = vma->vm_mm;

*prev = vma;
- if (!can_madv_lru_vma(vma))
+ if (!madvise_dontneed_free_valid_vma(vma, behavior))
return -EINVAL;

if (!userfaultfd_remove(vma, start, end)) {
@@ -830,7 +850,7 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
*/
return -ENOMEM;
}
- if (!can_madv_lru_vma(vma))
+ if (!madvise_dontneed_free_valid_vma(vma, behavior))
return -EINVAL;
if (end > vma->vm_end) {
/*
--
2.34.1