[PATCH v3 17/21] clear_huge_page: use non-cached clearing

From: Ankur Arora
Date: Mon Jun 06 2022 - 16:57:10 EST


Non-caching stores are suitable for circumstances where the destination
region is unlikely to be read again soon, or is large enough that
there's no expectation that we will find the data in the cache.

Add a new parameter to clear_user_extent(), which handles the
non-caching clearing path for huge and gigantic pages. This needs a
final clear_page_make_coherent() operation since non-cached clearing
typically involves weakly ordered stores that are incoherent wrt other
operations in the memory hierarchy.

This path is always invoked for gigantic pages, for huge pages only if
pages_per_huge_page is greater than an architectural threshold, or if
the user gives an explicit hint (if for instance, this call is part of
a larger clearing operation.)

Signed-off-by: Ankur Arora <ankur.a.arora@xxxxxxxxxx>
---
include/linux/mm.h | 3 ++-
mm/huge_memory.c | 3 ++-
mm/hugetlb.c | 3 ++-
mm/memory.c | 50 +++++++++++++++++++++++++++++++++++++++-------
4 files changed, 49 insertions(+), 10 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5084571b2fb6..a9b0c1889348 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3302,7 +3302,8 @@ enum mf_action_page_type {
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
extern void clear_huge_page(struct page *page,
unsigned long addr_hint,
- unsigned int pages_per_huge_page);
+ unsigned int pages_per_huge_page,
+ bool non_cached);
extern void copy_user_huge_page(struct page *dst, struct page *src,
unsigned long addr_hint,
struct vm_area_struct *vma,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index a77c78a2b6b5..73654db77a1c 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -594,6 +594,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
pgtable_t pgtable;
unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
vm_fault_t ret = 0;
+ bool non_cached = false;

VM_BUG_ON_PAGE(!PageCompound(page), page);

@@ -611,7 +612,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
goto release;
}

- clear_huge_page(page, vmf->address, HPAGE_PMD_NR);
+ clear_huge_page(page, vmf->address, HPAGE_PMD_NR, non_cached);
/*
* The memory barrier inside __SetPageUptodate makes sure that
* clear_huge_page writes become visible before the set_pmd_at()
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7c468ac1d069..0c4a31b5c1e9 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5481,6 +5481,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
spinlock_t *ptl;
unsigned long haddr = address & huge_page_mask(h);
bool new_page, new_pagecache_page = false;
+ bool non_cached = false;

/*
* Currently, we are forced to kill the process in the event the
@@ -5536,7 +5537,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
spin_unlock(ptl);
goto out;
}
- clear_huge_page(page, address, pages_per_huge_page(h));
+ clear_huge_page(page, address, pages_per_huge_page(h), non_cached);
__SetPageUptodate(page);
new_page = true;

diff --git a/mm/memory.c b/mm/memory.c
index b78b32a3e915..0638dc56828f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5606,11 +5606,18 @@ bool clear_page_prefer_non_caching(unsigned long extent)
*
* With ARCH_MAX_CLEAR_PAGES == 1, clear_user_highpages() drops down
* to page-at-a-time mode. Or, funnels through to clear_user_pages().
+ *
+ * With coherent == false, we use incoherent stores and the caller is
+ * responsible for making the region coherent again by calling
+ * clear_page_make_coherent().
*/
static void clear_user_extent(struct page *start_page, unsigned long vaddr,
- unsigned int npages)
+ unsigned int npages, bool coherent)
{
- clear_user_highpages(start_page, vaddr, npages);
+ if (coherent)
+ clear_user_highpages(start_page, vaddr, npages);
+ else
+ clear_user_highpages_incoherent(start_page, vaddr, npages);
}

struct subpage_arg {
@@ -5709,6 +5716,13 @@ static void clear_gigantic_page(struct page *page,
{
int i;
struct page *p = page;
+ bool coherent;
+
+ /*
+ * Gigantic pages are large enough, that there are no cache
+ * expectations. Use the incoherent path.
+ */
+ coherent = false;

might_sleep();
for (i = 0; i < pages_per_huge_page;
@@ -5718,9 +5732,16 @@ static void clear_gigantic_page(struct page *page,
* guarantees that p[0] and p[clear_page_unit-1]
* never straddle a mem_map discontiguity.
*/
- clear_user_extent(p, base_addr + i * PAGE_SIZE, clear_page_unit);
+ clear_user_extent(p, base_addr + i * PAGE_SIZE,
+ clear_page_unit, coherent);
cond_resched();
}
+
+ /*
+ * We need to make sure that writes above are ordered before
+ * updating the PTE and marking SetPageUptodate().
+ */
+ clear_page_make_coherent();
}

static void clear_subpages(struct subpage_arg *sa,
@@ -5736,15 +5757,16 @@ static void clear_subpages(struct subpage_arg *sa,

n = min(clear_page_unit, remaining);

- clear_user_extent(page + i, base_addr + i * PAGE_SIZE, n);
+ clear_user_extent(page + i, base_addr + i * PAGE_SIZE,
+ n, true);
i += n;

cond_resched();
}
}

-void clear_huge_page(struct page *page,
- unsigned long addr_hint, unsigned int pages_per_huge_page)
+void clear_huge_page(struct page *page, unsigned long addr_hint,
+ unsigned int pages_per_huge_page, bool non_cached)
{
unsigned long addr = addr_hint &
~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1);
@@ -5755,7 +5777,21 @@ void clear_huge_page(struct page *page,
.page_unit = clear_page_unit,
};

- if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) {
+ /*
+ * The non-caching path is typically slower for small extents so use
+ * it only if the caller explicitly hints it or if the extent is
+ * large enough that there are no cache expectations.
+ *
+ * We let the gigantic page path handle the details.
+ */
+ non_cached |=
+ clear_page_prefer_non_caching(pages_per_huge_page * PAGE_SIZE);
+
+ if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES || non_cached)) {
+ /*
+ * Gigantic page clearing always uses incoherent clearing
+ * internally.
+ */
clear_gigantic_page(page, addr, pages_per_huge_page);
return;
}
--
2.31.1