[PATCH v3 5/5] mm: Add ZAP_FLAG_SKIP_SWAP and zap_flags

From: Peter Xu
Date: Wed Sep 08 2021 - 12:36:39 EST


Firstly, the comment in zap_pte_range() is misleading because it checks against
details rather than check_mappings, so it's against what the code did.

Meanwhile, there's no explicit reason why passing in the details pointer should
mean to skip all swap entries. New user of zap_details could very possibly
miss this fact if they don't read deep until zap_pte_range() because there's no
comment at zap_details talking about it at all, so swap entries could be
erroneously skipped without being noticed.

This partly reverts 3e8715fdc03e ("mm: drop zap_details::check_swap_entries"),
but introduce ZAP_FLAG_SKIP_SWAP flag, which means the opposite of previous
"details" parameter: the caller should explicitly set this to skip swap
entries, otherwise swap entries will always be considered (which should still
be the major case here).

We may want to look into when exactly we need ZAP_FLAG_SKIP_SWAP and we should
have it in a synchronous manner, e.g., currently even if ZAP_FLAG_SKIP_SWAP is
set we'll still look into swap pmds no matter what. But that should be a
separate effort of this patch.

The flag introduced in this patch will be a preparation for more bits defined
in the future, e.g., for a new bit in flag to show whether to persist the
upcoming uffd-wp bit in pgtable entries.

Cc: Kirill A. Shutemov <kirill@xxxxxxxxxxxxx>
Cc: Hugh Dickins <hughd@xxxxxxxxxx>
Signed-off-by: Peter Xu <peterx@xxxxxxxxxx>
---
include/linux/mm.h | 16 ++++++++++++++++
mm/memory.c | 6 +++---
2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ed44f31615d9..beb784ce35b9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1717,12 +1717,18 @@ static inline bool can_do_mlock(void) { return false; }
extern int user_shm_lock(size_t, struct ucounts *);
extern void user_shm_unlock(size_t, struct ucounts *);

+typedef unsigned int __bitwise zap_flags_t;
+
+/* Whether to skip zapping swap entries */
+#define ZAP_FLAG_SKIP_SWAP ((__force zap_flags_t) BIT(0))
+
/*
* Parameter block passed down to zap_pte_range in exceptional cases.
*/
struct zap_details {
struct address_space *zap_mapping; /* Check page->mapping if set */
struct page *single_page; /* Locked page to be unmapped */
+ zap_flags_t zap_flags; /* Extra flags for zapping */
};

/*
@@ -1739,6 +1745,16 @@ zap_skip_check_mapping(struct zap_details *details, struct page *page)
(details->zap_mapping != page_rmapping(page));
}

+/* Return true if skip swap entries, false otherwise */
+static inline bool
+zap_skip_swap(struct zap_details *details)
+{
+ if (!details)
+ return false;
+
+ return details->zap_flags & ZAP_FLAG_SKIP_SWAP;
+}
+
struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
pte_t pte);
struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
diff --git a/mm/memory.c b/mm/memory.c
index e5ee8399d270..26e37bef1888 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1379,8 +1379,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
continue;
}

- /* If details->check_mapping, we leave swap entries. */
- if (unlikely(details))
+ if (unlikely(zap_skip_swap(details)))
continue;

if (!non_swap_entry(entry))
@@ -3353,6 +3352,7 @@ void unmap_mapping_page(struct page *page)

details.zap_mapping = mapping;
details.single_page = page;
+ details.zap_flags = ZAP_FLAG_SKIP_SWAP;

i_mmap_lock_write(mapping);
if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
@@ -3377,7 +3377,7 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start,
pgoff_t nr, bool even_cows)
{
pgoff_t first_index = start, last_index = start + nr - 1;
- struct zap_details details = { };
+ struct zap_details details = { .zap_flags = ZAP_FLAG_SKIP_SWAP };

details.zap_mapping = even_cows ? NULL : mapping;
if (last_index < first_index)
--
2.31.1