This patch brings back mlocked THP. Instead of forbidding mlocked pages
altogether, we just avoid mlocking PTE-mapped THPs and munlock THPs on
split_huge_pmd().
This means PTE-mapped THPs will be on normal lru lists and will be
split under memory pressure by vmscan. After the split vmscan will
detect unevictable small pages and mlock them.
This way we can void leaking mlocked pages into non-VM_LOCKED VMAs.
Not-Yet-Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
---
I'm not yet 100% certain that this approch is correct. Review would be appriciated.
More testing is required.
---
mm/gup.c | 6 ++++--
mm/huge_memory.c | 33 +++++++++++++++++++++++-------
mm/memory.c | 3 +--
mm/mlock.c | 61 +++++++++++++++++++++++++++++++++++++-------------------
4 files changed, 71 insertions(+), 32 deletions(-)
diff --git a/mm/gup.c b/mm/gup.c
index 70d65e4015a4..e95b0cb6ed81 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -143,6 +143,10 @@ retry:
mark_page_accessed(page);
}
if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
+ /* Do not mlock pte-mapped THP */
+ if (PageTransCompound(page))
+ goto out;
+
/*
* The preliminary mapping check is mainly to avoid the
* pointless overhead of lock_page on the ZERO_PAGE
@@ -920,8 +924,6 @@ long populate_vma_page_range(struct vm_area_struct *vma,
gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK;
if (vma->vm_flags & VM_LOCKONFAULT)
gup_flags &= ~FOLL_POPULATE;
- if (vma->vm_flags & VM_LOCKED)
- gup_flags |= FOLL_SPLIT;
/*
* We want to touch writable mappings with a write fault in order
* to break COW, except for shared mappings because these don't COW
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2cc99f9096a8..d714de02473b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -846,8 +846,6 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
return VM_FAULT_FALLBACK;
- if (vma->vm_flags & VM_LOCKED)
- return VM_FAULT_FALLBACK;
if (unlikely(anon_vma_prepare(vma)))
return VM_FAULT_OOM;
if (unlikely(khugepaged_enter(vma, vma->vm_flags)))
@@ -1316,7 +1314,16 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
update_mmu_cache_pmd(vma, addr, pmd);
}
if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
- if (page->mapping && trylock_page(page)) {
+ /*
+ * We don't mlock() pte-mapped THPs. This way we can avoid
+ * leaking mlocked pages into non-VM_LOCKED VMAs.
+ * In most cases the pmd is the only mapping of the page: we
+ * break COW for the mlock(). The only scenario when we have
+ * the page shared here is if we mlocking read-only mapping
+ * shared over fork(). We skip mlocking such pages.