[PATCH RFC 11/12] mm/gup: Handle hugepd for follow_page()

From: Peter Xu
Date: Wed Nov 15 2023 - 20:30:03 EST


Hugepd is only used in PowerPC's hugetlbfs. follow_page_mask() used to
leverage hugetlb APIs to access hugepd entries. Teach follow_page_mask()
itself on hugepd.

With previous refactors on fast-gup gup_huge_pd(), most of the code can be
easily leveraged. Since follow_page() always only fetch one page, set the
end to "address + PAGE_SIZE" should suffice.

Signed-off-by: Peter Xu <peterx@xxxxxxxxxx>
---
mm/gup.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++------
1 file changed, 69 insertions(+), 8 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index 7c210206470f..e635278f65f9 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -25,6 +25,11 @@

#include "internal.h"

+static struct page *follow_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
+ unsigned long addr, unsigned int pdshift,
+ unsigned int flags,
+ struct follow_page_context *ctx);
+
static inline void sanity_check_pinned_pages(struct page **pages,
unsigned long npages)
{
@@ -713,6 +718,9 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
spin_unlock(ptl);
return page;
}
+ if (unlikely(is_hugepd(__hugepd(pmd_val(pmdval)))))
+ return follow_hugepd(vma, __hugepd(pmd_val(pmdval)),
+ address, PMD_SHIFT, flags, ctx);
if (likely(!pmd_thp_or_huge(pmdval)))
return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);

@@ -764,6 +772,10 @@ static struct page *follow_pud_mask(struct vm_area_struct *vma,
if (unlikely(pud_bad(pud)))
return no_page_table(vma, flags, address);

+ if (unlikely(is_hugepd(__hugepd(pud_val(pud)))))
+ return follow_hugepd(vma, __hugepd(pud_val(pud)),
+ address, PUD_SHIFT, flags, ctx);
+
return follow_pmd_mask(vma, address, pudp, flags, ctx);
}

@@ -772,15 +784,19 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma,
unsigned int flags,
struct follow_page_context *ctx)
{
- p4d_t *p4d;
+ p4d_t *p4d, p4dval;

p4d = p4d_offset(pgdp, address);
- if (p4d_none(*p4d))
- return no_page_table(vma, flags, address);
- BUILD_BUG_ON(p4d_huge(*p4d));
- if (unlikely(p4d_bad(*p4d)))
+ p4dval = *p4d;
+ BUILD_BUG_ON(p4d_huge(p4dval));
+
+ if (p4d_none(p4dval) || unlikely(p4d_bad(p4dval)))
return no_page_table(vma, flags, address);

+ if (unlikely(is_hugepd(__hugepd(p4d_val(p4dval)))))
+ return follow_hugepd(vma, __hugepd(p4d_val(p4dval)),
+ address, P4D_SHIFT, flags, ctx);
+
return follow_pud_mask(vma, address, p4d, flags, ctx);
}

@@ -812,7 +828,7 @@ static struct page *follow_page_mask(struct vm_area_struct *vma,
unsigned long address, unsigned int flags,
struct follow_page_context *ctx)
{
- pgd_t *pgd;
+ pgd_t *pgd, pgdval;
struct mm_struct *mm = vma->vm_mm;

ctx->page_mask = 0;
@@ -827,11 +843,17 @@ static struct page *follow_page_mask(struct vm_area_struct *vma,
&ctx->page_mask);

pgd = pgd_offset(mm, address);
+ pgdval = *pgd;

if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
- return no_page_table(vma, flags, address);
+ page = no_page_table(vma, flags, address);
+ else if (unlikely(is_hugepd(__hugepd(pgd_val(pgdval)))))
+ page = follow_hugepd(vma, __hugepd(pgd_val(pgdval)),
+ address, PGDIR_SHIFT, flags, ctx);
+ else
+ page = follow_p4d_mask(vma, address, pgd, flags, ctx);

- return follow_p4d_mask(vma, address, pgd, flags, ctx);
+ return page;
}

struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
@@ -2850,6 +2872,37 @@ static int gup_huge_pd(hugepd_t hugepd, unsigned long addr,

return 1;
}
+
+static struct page *follow_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
+ unsigned long addr, unsigned int pdshift,
+ unsigned int flags,
+ struct follow_page_context *ctx)
+{
+ struct page *page;
+ struct hstate *h;
+ spinlock_t *ptl;
+ int nr = 0, ret;
+ pte_t *ptep;
+
+ /* Only hugetlb supports hugepd */
+ if (WARN_ON_ONCE(!is_vm_hugetlb_page(vma)))
+ return ERR_PTR(-EFAULT);
+
+ h = hstate_vma(vma);
+ ptep = hugepte_offset(hugepd, addr, pdshift);
+ ptl = huge_pte_lock(h, mm, ptep);
+ ret = gup_huge_pd(hugepd, addr, pdshift, addr + PAGE_SIZE,
+ flags, &page, &nr)) {
+ spin_unlock(ptl);
+
+ if (ret) {
+ WARN_ON_ONCE(nr != 1);
+ ctx->page_mask = (1U << huge_page_order(h)) - 1;
+ return page;
+ }
+
+ return NULL;
+}
#else
static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
unsigned int pdshift, unsigned long end, unsigned int flags,
@@ -2857,6 +2910,14 @@ static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
{
return 0;
}
+
+static struct page *follow_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
+ unsigned long addr, unsigned int pdshift,
+ unsigned int flags,
+ struct follow_page_context *ctx)
+{
+ return NULL;
+}
#endif /* CONFIG_ARCH_HAS_HUGEPD */

static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
--
2.41.0