Re: [PATCH V9 1/4] perf/core: Add PERF_SAMPLE_DATA_PAGE_SIZE

From: Peter Zijlstra
Date: Thu Nov 12 2020 - 06:37:28 EST


On Thu, Nov 12, 2020 at 10:53:58AM +0100, Peter Zijlstra wrote:
> You just don't like it because you want me to be purely page-table
> based.

How's something like this then? I failed to untangle Power's many MMUs
though :/

---
arch/arm64/include/asm/pgtable.h | 3 ++
arch/sparc/include/asm/pgtable_64.h | 14 +++++
arch/sparc/mm/hugetlbpage.c | 19 ++++---
include/linux/pgtable.h | 16 ++++++
kernel/events/core.c | 100 +++++++++++++-----------------------
5 files changed, 82 insertions(+), 70 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 4ff12a7adcfd..1cd2d986b0ca 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -503,6 +503,9 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
PMD_TYPE_SECT)
#define pmd_leaf(pmd) pmd_sect(pmd)

+#define pmd_leaf_size(pmd) (pmd_cont(pmd) ? CONT_PMD_SIZE : PMD_SIZE)
+#define pte_leaf_size(pte) (pte_cont(pte) ? CONT_PTE_SIZE : PAGE_SIZE)
+
#if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3
static inline bool pud_sect(pud_t pud) { return false; }
static inline bool pud_table(pud_t pud) { return true; }
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 7ef6affa105e..1723e18ba89f 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -1121,6 +1121,20 @@ extern unsigned long cmdline_memory_size;

asmlinkage void do_sparc64_fault(struct pt_regs *regs);

+#ifdef CONFIG_HUGETLB_PAGE
+
+#define pud_leaf_size pud_leaf_size
+extern unsigned long pud_leaf_size(pud_t pud);
+
+#define pmd_leaf_size pmd_leaf_size
+extern unsigned long pmd_leaf_size(pmd_t pmd);
+
+#define pte_leaf_size pte_leaf_size
+extern unsigned long pte_leaf_size(pte_t pte);
+#endif
+
+#endif /* CONFIG_HUGETLB_PAGE */
+
#endif /* !(__ASSEMBLY__) */

#endif /* !(_SPARC64_PGTABLE_H) */
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index ec423b5f17dd..3e806b87ec19 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -247,14 +247,17 @@ static unsigned int sun4u_huge_tte_to_shift(pte_t entry)
return shift;
}

-static unsigned int huge_tte_to_shift(pte_t entry)
+static unsigned long __tte_to_shift(pte_t entry)
{
- unsigned long shift;
-
if (tlb_type == hypervisor)
- shift = sun4v_huge_tte_to_shift(entry);
- else
- shift = sun4u_huge_tte_to_shift(entry);
+ return sun4v_huge_tte_to_shift(entry);
+
+ return sun4u_huge_tte_to_shift(entry);
+}
+
+static unsigned int huge_tte_to_shift(pte_t entry)
+{
+ unsigned long shift = __tte_to_shift(entry);

if (shift == PAGE_SHIFT)
WARN_ONCE(1, "tto_to_shift: invalid hugepage tte=0x%lx\n",
@@ -272,6 +275,10 @@ static unsigned long huge_tte_to_size(pte_t pte)
return size;
}

+unsigned long pud_leaf_size(pud_t pud) { return 1UL << __tte_to_shift((pte_t)pud); }
+unsigned long pmd_leaf_size(pmd_t pmd) { return 1UL << __tte_to_shift((pte_t)pmd); }
+unsigned long pte_leaf_size(pte_t pte) { return 1UL << __tte_to_shift((pte_t)pte); }
+
pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 71125a4676c4..35b9da397e55 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1481,4 +1481,20 @@ typedef unsigned int pgtbl_mod_mask;
#define pmd_leaf(x) 0
#endif

+#ifndef pgd_leaf_size(x)
+#define pgd_leaf_size(x) PGD_SIZE
+#endif
+#ifndef p4d_leaf_size(x)
+#define p4d_leaf_size(x) P4D_SIZE
+#endif
+#ifndef pud_leaf_size(x)
+#define pud_leaf_size(x) PUD_SIZE
+#endif
+#ifndef pmd_leaf_size(x)
+#define pmd_leaf_size(x) PMD_SIZE
+#endif
+#ifndef pte_leaf_size(x)
+#define pte_leaf_size(x) PAGE_SIZE
+#endif
+
#endif /* _LINUX_PGTABLE_H */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d2f3ca792936..fca04fcfc9ea 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7001,90 +7001,62 @@ static u64 perf_virt_to_phys(u64 virt)
return phys_addr;
}

-#ifdef CONFIG_MMU
-
/*
* Return the MMU page size of a given virtual address.
- *
- * This generic implementation handles page-table aligned huge pages, as well
- * as non-page-table aligned hugetlbfs compound pages.
- *
- * If an architecture supports and uses non-page-table aligned pages in their
- * kernel mapping it will need to provide it's own implementation of this
- * function.
*/
-__weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr)
+static u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr)
{
+ u64 size = 0;
+#ifdef CONFIG_HAVE_FAST_GUP
struct page *page;
- pgd_t *pgd;
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-
- pgd = pgd_offset(mm, addr);
- if (pgd_none(*pgd))
+ pgd_t *pgdp, pgd;
+ p4d_t *p4dp, p4d;
+ pud_t *pudp, pud;
+ pmd_t *pmdp, pmd;
+ pte_t *ptep, pte;
+
+ pgdp = pgd_offset(mm, addr);
+ pgd = READ_ONCE(*pgdp);
+ if (pgd_none(pgd))
return 0;

- p4d = p4d_offset(pgd, addr);
- if (!p4d_present(*p4d))
- return 0;
+ if (pgd_leaf(pgd))
+ return pgd_leaf_size(pgd);

- if (p4d_leaf(*p4d))
- return 1ULL << P4D_SHIFT;
-
- pud = pud_offset(p4d, addr);
- if (!pud_present(*pud))
+ p4dp = p4d_offset_lockless(pgdp, pgd, addr);
+ p4d = READ_ONCE(*p4dp);
+ if (!p4d_present(p4d))
return 0;

- if (pud_leaf(*pud)) {
-#ifdef pud_page
- page = pud_page(*pud);
- if (PageHuge(page))
- return page_size(compound_head(page));
-#endif
- return 1ULL << PUD_SHIFT;
- }
+ if (p4d_leaf(p4d))
+ return p4d_leaf_size(p4d);

- pmd = pmd_offset(pud, addr);
- if (!pmd_present(*pmd))
+ pudp = pud_offset_lockless(p4dp, p4d, addr);
+ pud = READ_ONCE(*pudp);
+ if (!pud_present(pud))
return 0;

- if (pmd_leaf(*pmd)) {
-#ifdef pmd_page
- page = pmd_page(*pmd);
- if (PageHuge(page))
- return page_size(compound_head(page));
-#endif
- return 1ULL << PMD_SHIFT;
- }
+ if (pud_leaf(pud))
+ return pud_leaf_size(pud);

- pte = pte_offset_map(pmd, addr);
- if (!pte_present(*pte)) {
- pte_unmap(pte);
+ pmdp = pmd_offset_lockless(pudp, pud, addr);
+ pmd = READ_ONCE(*pmdp);
+ if (!pmd_present(pmd))
return 0;
- }
-
- page = pte_page(*pte);
- if (PageHuge(page)) {
- u64 size = page_size(compound_head(page));
- pte_unmap(pte);
- return size;
- }

- pte_unmap(pte);
- return PAGE_SIZE;
-}
+ if (pmd_leaf(pmd))
+ return pmd_leaf_size(pmd);

-#else
+ ptep = pte_offset_map(&pmd, addr);
+ pte = READ_ONCE(*ptep); // gup_get_pte()
+ if (pte_present(pte))
+ size = pte_leaf_size(pte);
+ pte_unmap(ptep);

-static u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr)
-{
- return 0;
+#endif /* CONFIG_HAVE_FAST_GUP */
+ return size;
}

-#endif
-
static u64 perf_get_page_size(unsigned long addr)
{
struct mm_struct *mm;