[PATCH] MIPS: Add 48-bit VA space (and 4-level page tables) for 4K pages.

From: David Daney
Date: Thu Feb 16 2017 - 20:27:57 EST


From: Alex Belits <alex.belits@xxxxxxxxxx>

Some users must have 4K pages while needing a 48-bit VA space size.
The cleanest way do do this is to go to a 4-level page table for this
case. Each page table level using order-0 pages adds 9 bits to the
VA size (at 4K pages, so for four levels we get 9 * 4 + 12 == 48-bits.

For the 4K page size case only we add support functions for the PUD
level of the page table tree, also the TLB exception handlers get an
extra level of tree walk.

Signed-off-by: Alex Belits <alex.belits@xxxxxxxxxx>
[david.daney@xxxxxxxxxx] Forward port to v4.10
Signed-off-by: David Daney <david.daney@xxxxxxxxxx>
---
arch/mips/Kconfig | 13 +++---
arch/mips/include/asm/pgalloc.h | 26 +++++++++++
arch/mips/include/asm/pgtable-64.h | 88 +++++++++++++++++++++++++++++++++++---
arch/mips/mm/init.c | 3 ++
arch/mips/mm/pgtable-64.c | 33 ++++++++++++--
arch/mips/mm/tlbex.c | 22 ++++++++++
6 files changed, 172 insertions(+), 13 deletions(-)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index b3c5bde..cd83512 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2113,10 +2113,13 @@ config MIPS_VA_BITS_48
bool "48 bits virtual memory"
depends on 64BIT
help
- Support a maximum at least 48 bits of application virtual memory.
- Default is 40 bits or less, depending on the CPU.
- This option result in a small memory overhead for page tables.
- This option is only supported with 16k and 64k page sizes.
+ Support a maximum at least 48 bits of application virtual
+ memory. Default is 40 bits or less, depending on the CPU.
+ For page sizes 16k and above, this option results in a small
+ memory overhead for page tables. For 4k page size, a fourth
+ level of page tables is added which imposes both a memory
+ overhead as well as slower TLB fault handling.
+
If unsure, say N.

choice
@@ -2126,7 +2129,6 @@ choice
config PAGE_SIZE_4KB
bool "4kB"
depends on !CPU_LOONGSON2 && !CPU_LOONGSON3
- depends on !MIPS_VA_BITS_48
help
This option select the standard 4kB Linux page size. On some
R3000-family processors this is the only available page size. Using
@@ -2977,6 +2979,7 @@ config HAVE_LATENCYTOP_SUPPORT

config PGTABLE_LEVELS
int
+ default 4 if PAGE_SIZE_4KB && MIPS_VA_BITS_48
default 3 if 64BIT && !PAGE_SIZE_64KB
default 2

diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index a03e869..4fb20ff 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -124,6 +124,32 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)

#endif

+#ifndef __PAGETABLE_PUD_FOLDED
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+ pud_t *pud;
+
+ pud = (pud_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT, PUD_ORDER);
+ if (pud)
+ pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
+ return pud;
+}
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+ free_pages((unsigned long)pud, PUD_ORDER);
+}
+
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+{
+ set_pgd(pgd, __pgd((unsigned long)pud));
+}
+
+#define __pud_free_tlb(tlb, x, addr) pud_free((tlb)->mm, x)
+
+#endif /* __PAGETABLE_PUD_FOLDED */
+
#define check_pgt_cache() do { } while (0)

extern void pagetable_init(void);
diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h
index 514cbc0..a19d17d 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -19,7 +19,7 @@

#if defined(CONFIG_PAGE_SIZE_64KB) && !defined(CONFIG_MIPS_VA_BITS_48)
#include <asm-generic/pgtable-nopmd.h>
-#else
+#elif !(defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_MIPS_VA_BITS_48))
#include <asm-generic/pgtable-nopud.h>
#endif

@@ -53,9 +53,18 @@
#define PMD_SIZE (1UL << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))

+# ifdef __PAGETABLE_PUD_FOLDED
+# define PGDIR_SHIFT (PMD_SHIFT + (PAGE_SHIFT + PMD_ORDER - 3))
+# endif
+#endif

-#define PGDIR_SHIFT (PMD_SHIFT + (PAGE_SHIFT + PMD_ORDER - 3))
+#ifndef __PAGETABLE_PUD_FOLDED
+#define PUD_SHIFT (PMD_SHIFT + (PAGE_SHIFT + PMD_ORDER - 3))
+#define PUD_SIZE (1UL << PUD_SHIFT)
+#define PUD_MASK (~(PUD_SIZE-1))
+#define PGDIR_SHIFT (PUD_SHIFT + (PAGE_SHIFT + PUD_ORDER - 3))
#endif
+
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))

@@ -78,8 +87,13 @@
* of virtual address space.
*/
#ifdef CONFIG_PAGE_SIZE_4KB
-#define PGD_ORDER 1
-#define PUD_ORDER aieeee_attempt_to_allocate_pud
+# ifdef CONFIG_MIPS_VA_BITS_48
+# define PGD_ORDER 0
+# define PUD_ORDER 0
+# else
+# define PGD_ORDER 1
+# define PUD_ORDER aieeee_attempt_to_allocate_pud
+# endif
#define PMD_ORDER 0
#define PTE_ORDER 0
#endif
@@ -117,6 +131,9 @@
#endif

#define PTRS_PER_PGD ((PAGE_SIZE << PGD_ORDER) / sizeof(pgd_t))
+#ifndef __PAGETABLE_PUD_FOLDED
+#define PTRS_PER_PUD ((PAGE_SIZE << PUD_ORDER) / sizeof(pud_t))
+#endif
#ifndef __PAGETABLE_PMD_FOLDED
#define PTRS_PER_PMD ((PAGE_SIZE << PMD_ORDER) / sizeof(pmd_t))
#endif
@@ -133,7 +150,7 @@
#define VMALLOC_START (MAP_BASE + (2 * PAGE_SIZE))
#define VMALLOC_END \
(MAP_BASE + \
- min(PTRS_PER_PGD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, \
+ min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, \
(1UL << cpu_vmbits)) - (1UL << 32))

#if defined(CONFIG_MODULES) && defined(KBUILD_64BIT_SYM32) && \
@@ -149,12 +166,72 @@
#define pmd_ERROR(e) \
printk("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))
#endif
+#ifndef __PAGETABLE_PUD_FOLDED
+#define pud_ERROR(e) \
+ printk("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
+#endif
#define pgd_ERROR(e) \
printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e))

extern pte_t invalid_pte_table[PTRS_PER_PTE];
extern pte_t empty_bad_page_table[PTRS_PER_PTE];

+#ifndef __PAGETABLE_PUD_FOLDED
+/*
+ * For 4-level pagetables we defines these ourselves, for 3-level the
+ * definitions are below, for 2-level the
+ * definitions are supplied by <asm-generic/pgtable-nopmd.h>.
+ */
+typedef struct { unsigned long pud; } pud_t;
+#define pud_val(x) ((x).pud)
+#define __pud(x) ((pud_t) { (x) })
+
+extern pud_t invalid_pud_table[PTRS_PER_PUD];
+
+/*
+ * Empty pgd entries point to the invalid_pud_table.
+ */
+static inline int pgd_none(pgd_t pgd)
+{
+ return pgd_val(pgd) == (unsigned long)invalid_pud_table;
+}
+
+static inline int pgd_bad(pgd_t pgd)
+{
+ if (unlikely(pgd_val(pgd) & ~PAGE_MASK))
+ return 1;
+
+ return 0;
+}
+
+static inline int pgd_present(pgd_t pgd)
+{
+ return pgd_val(pgd) != (unsigned long)invalid_pud_table;
+}
+
+static inline void pgd_clear(pgd_t *pgdp)
+{
+ pgd_val(*pgdp) = (unsigned long)invalid_pud_table;
+}
+
+#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+
+static inline unsigned long pgd_page_vaddr(pgd_t pgd)
+{
+ return pgd_val(pgd);
+}
+
+static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
+{
+ return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address);
+}
+
+static inline void set_pgd(pgd_t *pgd, pgd_t pgdval)
+{
+ *pgd = pgdval;
+}
+
+#endif

#ifndef __PAGETABLE_PMD_FOLDED
/*
@@ -280,6 +357,7 @@ static inline pmd_t *pmd_offset(pud_t * pud, unsigned long address)
* Initialize a new pgd / pmd table with invalid pointers.
*/
extern void pgd_init(unsigned long page);
+extern void pud_init(unsigned long page, unsigned long pagetable);
extern void pmd_init(unsigned long page, unsigned long pagetable);

/*
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index e86ebcf..b1000b8 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -536,6 +536,9 @@ unsigned long pgd_current[NR_CPUS];
* it in the linker script.
*/
pgd_t swapper_pg_dir[_PTRS_PER_PGD] __section(.bss..swapper_pg_dir);
+#ifndef __PAGETABLE_PUD_FOLDED
+pud_t invalid_pud_table[PTRS_PER_PUD] __page_aligned_bss;
+#endif
#ifndef __PAGETABLE_PMD_FOLDED
pmd_t invalid_pmd_table[PTRS_PER_PMD] __page_aligned_bss;
#endif
diff --git a/arch/mips/mm/pgtable-64.c b/arch/mips/mm/pgtable-64.c
index ce4473e..335e3c9 100644
--- a/arch/mips/mm/pgtable-64.c
+++ b/arch/mips/mm/pgtable-64.c
@@ -18,10 +18,12 @@ void pgd_init(unsigned long page)
unsigned long *p, *end;
unsigned long entry;

-#ifdef __PAGETABLE_PMD_FOLDED
- entry = (unsigned long)invalid_pte_table;
-#else
+#if !defined(__PAGETABLE_PUD_FOLDED)
+ entry = (unsigned long)invalid_pud_table;
+#elif !defined(__PAGETABLE_PMD_FOLDED)
entry = (unsigned long)invalid_pmd_table;
+#else
+ entry = (unsigned long)invalid_pte_table;
#endif

p = (unsigned long *) page;
@@ -62,6 +64,28 @@ void pmd_init(unsigned long addr, unsigned long pagetable)
}
#endif

+#ifndef __PAGETABLE_PUD_FOLDED
+void pud_init(unsigned long addr, unsigned long pagetable)
+{
+ unsigned long *p, *end;
+
+ p = (unsigned long *)addr;
+ end = p + PTRS_PER_PUD;
+
+ do {
+ p[0] = pagetable;
+ p[1] = pagetable;
+ p[2] = pagetable;
+ p[3] = pagetable;
+ p[4] = pagetable;
+ p += 8;
+ p[-3] = pagetable;
+ p[-2] = pagetable;
+ p[-1] = pagetable;
+ } while (p != end);
+}
+#endif
+
pmd_t mk_pmd(struct page *page, pgprot_t prot)
{
pmd_t pmd;
@@ -85,6 +109,9 @@ void __init pagetable_init(void)

/* Initialize the entire pgd. */
pgd_init((unsigned long)swapper_pg_dir);
+#ifndef __PAGETABLE_PUD_FOLDED
+ pud_init((unsigned long)invalid_pud_table, (unsigned long)invalid_pmd_table);
+#endif
#ifndef __PAGETABLE_PMD_FOLDED
pmd_init((unsigned long)invalid_pmd_table, (unsigned long)invalid_pte_table);
#endif
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 55ce396..be30ed4 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -851,6 +851,13 @@ build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,

uasm_i_andi(p, tmp, tmp, (PTRS_PER_PGD - 1)<<3);
uasm_i_daddu(p, ptr, ptr, tmp); /* add in pgd offset */
+#ifndef __PAGETABLE_PUD_FOLDED
+ uasm_i_dmfc0(p, tmp, C0_BADVADDR); /* get faulting address */
+ uasm_i_ld(p, ptr, 0, ptr); /* get pud pointer */
+ uasm_i_dsrl_safe(p, tmp, tmp, PUD_SHIFT - 3); /* get pud offset in bytes */
+ uasm_i_andi(p, tmp, tmp, (PTRS_PER_PUD - 1) << 3);
+ uasm_i_daddu(p, ptr, ptr, tmp); /* add in pud offset */
+#endif
#ifndef __PAGETABLE_PMD_FOLDED
uasm_i_dmfc0(p, tmp, C0_BADVADDR); /* get faulting address */
uasm_i_ld(p, ptr, 0, ptr); /* get pmd pointer */
@@ -1167,6 +1174,21 @@ build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l,
uasm_i_ld(p, LOC_PTEP, 0, ptr); /* get pmd pointer */
}

+#ifndef __PAGETABLE_PUD_FOLDED
+ /* get pud offset in bytes */
+ uasm_i_dsrl_safe(p, scratch, tmp, PUD_SHIFT - 3);
+ uasm_i_andi(p, scratch, scratch, (PTRS_PER_PUD - 1) << 3);
+
+ if (use_lwx_insns()) {
+ UASM_i_LWX(p, ptr, scratch, ptr);
+ } else {
+ uasm_i_daddu(p, ptr, ptr, scratch); /* add in pmd offset */
+ UASM_i_LW(p, ptr, 0, ptr);
+ }
+ /* ptr contains a pointer to PMD entry */
+ /* tmp contains the address */
+#endif
+
#ifndef __PAGETABLE_PMD_FOLDED
/* get pmd offset in bytes */
uasm_i_dsrl_safe(p, scratch, tmp, PMD_SHIFT - 3);
--
2.9.3