[PATCH RFC 4/4] mm: Install marker pte when page out for shmem pages

From: Peter Xu
Date: Fri Aug 06 2021 - 23:25:48 EST


When shmem pages are swapped out, instead of clearing the pte entry, we leave a
marker pte showing that this page is swapped out as a hint for pagemap. A new
TTU flag is introduced to identify this case.

This can be useful for detecting swapped out cold shmem pages. Then after some
memory background scanning work (which will fault in the shmem page and
confusing page reclaim), we can do MADV_PAGEOUT explicitly on this page to swap
it out again as we know it was cold.

For pagemap, we don't need to explicitly set PM_SWAP bit, because by nature
SWP_PTE_MARKER ptes are already counted as PM_SWAP due to it's format as swap.

Signed-off-by: Peter Xu <peterx@xxxxxxxxxx>
---
fs/proc/task_mmu.c | 1 +
include/linux/rmap.h | 1 +
mm/rmap.c | 19 +++++++++++++++++++
mm/vmscan.c | 2 +-
4 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index eb97468dfe4c..21b8594abc1d 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1384,6 +1384,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
if (pm->show_pfn)
frame = swp_type(entry) |
(swp_offset(entry) << MAX_SWAPFILES_SHIFT);
+ /* NOTE: this covers PTE_MARKER_PAGEOUT too */
flags |= PM_SWAP;
if (is_pfn_swap_entry(entry))
page = pfn_swap_entry_to_page(entry);
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index c976cc6de257..318a0e95c7fb 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -95,6 +95,7 @@ enum ttu_flags {
* do a final flush if necessary */
TTU_RMAP_LOCKED = 0x80, /* do not grab rmap lock:
* caller holds it */
+ TTU_HINT_PAGEOUT = 0x100, /* Hint for pageout operation */
};

#ifdef CONFIG_MMU
diff --git a/mm/rmap.c b/mm/rmap.c
index b9eb5c12f3fe..24a70b36b6da 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1384,6 +1384,22 @@ void page_remove_rmap(struct page *page, bool compound)
unlock_page_memcg(page);
}

+static inline void
+pte_marker_install(struct vm_area_struct *vma, pte_t *pte,
+ struct page *page, unsigned long address)
+{
+#ifdef CONFIG_PTE_MARKER_PAGEOUT
+ swp_entry_t entry;
+ pte_t pteval;
+
+ if (vma_is_shmem(vma) && !PageAnon(page) && pte_none(*pte)) {
+ entry = make_pte_marker_entry(PTE_MARKER_PAGEOUT);
+ pteval = swp_entry_to_pte(entry);
+ set_pte_at(vma->vm_mm, address, pte, pteval);
+ }
+#endif
+}
+
/*
* @arg: enum ttu_flags will be passed to this argument
*/
@@ -1628,6 +1644,9 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
*/
dec_mm_counter(mm, mm_counter_file(page));
}
+
+ if (flags & TTU_HINT_PAGEOUT)
+ pte_marker_install(vma, pvmw.pte, page, address);
discard:
/*
* No need to call mmu_notifier_invalidate_range() it has be
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 4620df62f0ff..4754af6fa24b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1493,7 +1493,7 @@ static unsigned int shrink_page_list(struct list_head *page_list,
* processes. Try to unmap it here.
*/
if (page_mapped(page)) {
- enum ttu_flags flags = TTU_BATCH_FLUSH;
+ enum ttu_flags flags = TTU_BATCH_FLUSH | TTU_HINT_PAGEOUT;
bool was_swapbacked = PageSwapBacked(page);

if (unlikely(PageTransHuge(page)))
--
2.32.0