Re: [PATCH V4 01/13] perf/core, x86: Add PERF_SAMPLE_DATA_PAGE_SIZE
From: Peter Zijlstra
Date: Fri Feb 01 2019 - 07:44:05 EST
On Fri, Feb 01, 2019 at 01:36:00PM +0300, Kirill A. Shutemov wrote:
> On Fri, Feb 01, 2019 at 11:03:58AM +0100, Peter Zijlstra wrote:
> > Will just mentioned a lovely feature where some archs have multi entry
> > large pages.
> >
> > Possible something like:
> >
> > if (pud_large(*pud)) {
> > struct page *page = pud_page(*pud);
> > int order = PUD_SHIFT;
> >
> > if (PageHuge(page)) {
> > page = compound_head(page);
> > order += compound_order(page);
> > }
> >
> > return 1ULL << order;
> > }
> >
> > works correctly.
>
> For more fun: some compound pages can be mapped withe page table entries
> not matching it's compound size, i.e. 2M pages mapped with PTE.
Surely not for PageHuge() ?! I thought the point of hugetlbfs was to
guarantee page granularity.
How is the below?
static u64 __perf_get_page_size(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pgd = pgd_offset(mm, addr);
if (pgd_none(*pgd))
return 0;
p4d = p4d_offset(pgd, addr);
if (!p4d_present(*p4d))
return 0;
if (p4d_large(*p4d)) {
struct page *page = p4d_page(*p4d);
int shift = P4D_SHIFT;
if (PageHuge(page)) {
page = compound_head(page);
shift = PAGE_SHIFT + compound_order(page);
}
return 1ULL << shift;
}
if (!p4d_present(*p4d))
return 0;
pud = pud_offset(p4d, addr);
if (!pud_present(*pud))
return 0;
if (pud_large(*pud)) {
struct page *page = pud_page(*pud);
int shift = P4D_SHIFT;
if (PageHuge(page)) {
page = compound_head(page);
shift = PAGE_SHIFT + compound_order(page);
}
return 1ULL << shift;
}
pmd = pmd_offset(pud, addr);
if (!pmd_present(*pmd))
return 0;
if (pmd_large(*pmd)) {
struct page *page = pud_page(*pud);
int shift = P4D_SHIFT;
if (PageHuge(page)) {
page = compound_head(page);
shift = PAGE_SHIFT + compound_order(page);
}
return 1ULL << shift;
}
pte = pte_offset_map(pmd, addr);
if (!pte_present(*pte)) {
pte_unmap(pte);
return 0;
}
pte_unmap(pte);
return PAGE_SIZE;
}