Re: [PATCH] ia64: add support for folded p4d page tables

From: Michal Hocko
Date: Wed Dec 18 2019 - 08:59:45 EST


On Wed 18-12-19 11:38:20, Mike Rapoport wrote:
> From: Mike Rapoport <rppt@xxxxxxxxxxxxx>
>
> Implement primitives necessary for the 4th level folding, add walks of p4d
> level where appropriate, remove usage of __ARCH_USE_5LEVEL_HACK and replace
> 5level-fixup.h with pgtable-nop4d.h

Why do we need that? I thought that IA64 is essentially a dead
architecture. Is this fixing something?

> Signed-off-by: Mike Rapoport <rppt@xxxxxxxxxxxxx>
> ---
> arch/ia64/include/asm/pgalloc.h | 4 ++--
> arch/ia64/include/asm/pgtable.h | 17 ++++++++---------
> arch/ia64/mm/fault.c | 7 ++++++-
> arch/ia64/mm/hugetlbpage.c | 18 ++++++++++++------
> arch/ia64/mm/init.c | 28 ++++++++++++++++++++++++----
> 5 files changed, 52 insertions(+), 22 deletions(-)
>
> diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
> index f4c491044882..2a3050345099 100644
> --- a/arch/ia64/include/asm/pgalloc.h
> +++ b/arch/ia64/include/asm/pgalloc.h
> @@ -36,9 +36,9 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
>
> #if CONFIG_PGTABLE_LEVELS == 4
> static inline void
> -pgd_populate(struct mm_struct *mm, pgd_t * pgd_entry, pud_t * pud)
> +p4d_populate(struct mm_struct *mm, p4d_t * p4d_entry, pud_t * pud)
> {
> - pgd_val(*pgd_entry) = __pa(pud);
> + p4d_val(*p4d_entry) = __pa(pud);
> }
>
> static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
> diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
> index d602e7c622db..c87f789bc914 100644
> --- a/arch/ia64/include/asm/pgtable.h
> +++ b/arch/ia64/include/asm/pgtable.h
> @@ -283,12 +283,12 @@ extern unsigned long VMALLOC_END;
> #define pud_page(pud) virt_to_page((pud_val(pud) + PAGE_OFFSET))
>
> #if CONFIG_PGTABLE_LEVELS == 4
> -#define pgd_none(pgd) (!pgd_val(pgd))
> -#define pgd_bad(pgd) (!ia64_phys_addr_valid(pgd_val(pgd)))
> -#define pgd_present(pgd) (pgd_val(pgd) != 0UL)
> -#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0UL)
> -#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_val(pgd) & _PFN_MASK))
> -#define pgd_page(pgd) virt_to_page((pgd_val(pgd) + PAGE_OFFSET))
> +#define p4d_none(p4d) (!p4d_val(p4d))
> +#define p4d_bad(p4d) (!ia64_phys_addr_valid(p4d_val(p4d)))
> +#define p4d_present(p4d) (p4d_val(p4d) != 0UL)
> +#define p4d_clear(p4dp) (p4d_val(*(p4dp)) = 0UL)
> +#define p4d_page_vaddr(p4d) ((unsigned long) __va(p4d_val(p4d) & _PFN_MASK))
> +#define p4d_page(p4d) virt_to_page((p4d_val(p4d) + PAGE_OFFSET))
> #endif
>
> /*
> @@ -388,7 +388,7 @@ pgd_offset (const struct mm_struct *mm, unsigned long address)
> #if CONFIG_PGTABLE_LEVELS == 4
> /* Find an entry in the second-level page table.. */
> #define pud_offset(dir,addr) \
> - ((pud_t *) pgd_page_vaddr(*(dir)) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
> + ((pud_t *) p4d_page_vaddr(*(dir)) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
> #endif
>
> /* Find an entry in the third-level page table.. */
> @@ -582,10 +582,9 @@ extern struct page *zero_page_memmap_ptr;
>
>
> #if CONFIG_PGTABLE_LEVELS == 3
> -#define __ARCH_USE_5LEVEL_HACK
> #include <asm-generic/pgtable-nopud.h>
> #endif
> -#include <asm-generic/5level-fixup.h>
> +#include <asm-generic/pgtable-nop4d.h>
> #include <asm-generic/pgtable.h>
>
> #endif /* _ASM_IA64_PGTABLE_H */
> diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
> index c2f299fe9e04..ec994135cb74 100644
> --- a/arch/ia64/mm/fault.c
> +++ b/arch/ia64/mm/fault.c
> @@ -29,6 +29,7 @@ static int
> mapped_kernel_page_is_present (unsigned long address)
> {
> pgd_t *pgd;
> + p4d_t *p4d;
> pud_t *pud;
> pmd_t *pmd;
> pte_t *ptep, pte;
> @@ -37,7 +38,11 @@ mapped_kernel_page_is_present (unsigned long address)
> if (pgd_none(*pgd) || pgd_bad(*pgd))
> return 0;
>
> - pud = pud_offset(pgd, address);
> + p4d = p4d_offset(pgd, address);
> + if (p4d_none(*p4d) || p4d_bad(*p4d))
> + return 0;
> +
> + pud = pud_offset(p4d, address);
> if (pud_none(*pud) || pud_bad(*pud))
> return 0;
>
> diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
> index d16e419fd712..32352a73df0c 100644
> --- a/arch/ia64/mm/hugetlbpage.c
> +++ b/arch/ia64/mm/hugetlbpage.c
> @@ -30,12 +30,14 @@ huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
> {
> unsigned long taddr = htlbpage_to_page(addr);
> pgd_t *pgd;
> + p4d_t *p4d;
> pud_t *pud;
> pmd_t *pmd;
> pte_t *pte = NULL;
>
> pgd = pgd_offset(mm, taddr);
> - pud = pud_alloc(mm, pgd, taddr);
> + p4d = p4d_offset(pgd, taddr);
> + pud = pud_alloc(mm, p4d, taddr);
> if (pud) {
> pmd = pmd_alloc(mm, pud, taddr);
> if (pmd)
> @@ -49,17 +51,21 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr, unsigned long sz)
> {
> unsigned long taddr = htlbpage_to_page(addr);
> pgd_t *pgd;
> + p4d_t *p4d;
> pud_t *pud;
> pmd_t *pmd;
> pte_t *pte = NULL;
>
> pgd = pgd_offset(mm, taddr);
> if (pgd_present(*pgd)) {
> - pud = pud_offset(pgd, taddr);
> - if (pud_present(*pud)) {
> - pmd = pmd_offset(pud, taddr);
> - if (pmd_present(*pmd))
> - pte = pte_offset_map(pmd, taddr);
> + p4d = p4d_offset(pgd, addr);
> + if (p4d_present(*p4d)) {
> + pud = pud_offset(p4d, taddr);
> + if (pud_present(*pud)) {
> + pmd = pmd_offset(pud, taddr);
> + if (pmd_present(*pmd))
> + pte = pte_offset_map(pmd, taddr);
> + }
> }
> }
>
> diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
> index 58fd67068bac..bcdc78e97e6e 100644
> --- a/arch/ia64/mm/init.c
> +++ b/arch/ia64/mm/init.c
> @@ -208,6 +208,7 @@ static struct page * __init
> put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot)
> {
> pgd_t *pgd;
> + p4d_t *p4d;
> pud_t *pud;
> pmd_t *pmd;
> pte_t *pte;
> @@ -215,7 +216,10 @@ put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot)
> pgd = pgd_offset_k(address); /* note: this is NOT pgd_offset()! */
>
> {
> - pud = pud_alloc(&init_mm, pgd, address);
> + p4d = p4d_alloc(&init_mm, pgd, address);
> + if (!p4d)
> + goto out;
> + pud = pud_alloc(&init_mm, p4d, address);
> if (!pud)
> goto out;
> pmd = pmd_alloc(&init_mm, pud, address);
> @@ -382,6 +386,7 @@ int vmemmap_find_next_valid_pfn(int node, int i)
>
> do {
> pgd_t *pgd;
> + p4d_t *p4d;
> pud_t *pud;
> pmd_t *pmd;
> pte_t *pte;
> @@ -392,7 +397,13 @@ int vmemmap_find_next_valid_pfn(int node, int i)
> continue;
> }
>
> - pud = pud_offset(pgd, end_address);
> + p4d = p4d_offset(pgd, end_address);
> + if (p4d_none(*p4d)) {
> + end_address += P4D_SIZE;
> + continue;
> + }
> +
> + pud = pud_offset(p4d, end_address);
> if (pud_none(*pud)) {
> end_address += PUD_SIZE;
> continue;
> @@ -430,6 +441,7 @@ int __init create_mem_map_page_table(u64 start, u64 end, void *arg)
> struct page *map_start, *map_end;
> int node;
> pgd_t *pgd;
> + p4d_t *p4d;
> pud_t *pud;
> pmd_t *pmd;
> pte_t *pte;
> @@ -444,12 +456,20 @@ int __init create_mem_map_page_table(u64 start, u64 end, void *arg)
> for (address = start_page; address < end_page; address += PAGE_SIZE) {
> pgd = pgd_offset_k(address);
> if (pgd_none(*pgd)) {
> + p4d = memblock_alloc_node(PAGE_SIZE, PAGE_SIZE, node);
> + if (!p4d)
> + goto err_alloc;
> + pgd_populate(&init_mm, pgd, p4d);
> + }
> + p4d = p4d_offset(pgd, address);
> +
> + if (p4d_none(*p4d)) {
> pud = memblock_alloc_node(PAGE_SIZE, PAGE_SIZE, node);
> if (!pud)
> goto err_alloc;
> - pgd_populate(&init_mm, pgd, pud);
> + p4d_populate(&init_mm, p4d, pud);
> }
> - pud = pud_offset(pgd, address);
> + pud = pud_offset(p4d, address);
>
> if (pud_none(*pud)) {
> pmd = memblock_alloc_node(PAGE_SIZE, PAGE_SIZE, node);
> --
> 2.24.0
>

--
Michal Hocko
SUSE Labs