Re: [PATCH v2 3/5] RISC-V: Allow booting kernel from any 4KB aligned address

From: Mike Rapoport
Date: Sat Mar 23 2019 - 11:40:56 EST


On Thu, Mar 21, 2019 at 09:47:51AM +0000, Anup Patel wrote:
> Currently, we have to boot RISCV64 kernel from a 2MB aligned physical
> address and RISCV32 kernel from a 4MB aligned physical address. This
> constraint is because initial pagetable setup (i.e. setup_vm()) maps
> entire RAM using hugepages (i.e. 2MB for 3-level pagetable and 4MB for
> 2-level pagetable).
>
> Further, the above booting contraint also results in memory wastage
> because if we boot kernel from some <xyz> address (which is not same as
> RAM start address) then RISCV kernel will map PAGE_OFFSET virtual address
> lineraly to <xyz> physical address and memory between RAM start and <xyz>
> will be reserved/unusable.
>
> For example, RISCV64 kernel booted from 0x80200000 will waste 2MB of RAM
> and RISCV32 kernel booted from 0x80400000 will waste 4MB of RAM.
>
> This patch re-writes the initial pagetable setup code to allow booting
> RISV32 and RISCV64 kernel from any 4KB (i.e. PAGE_SIZE) aligned address.
>
> To achieve this:
> 1. We add kconfig option BOOT_PAGE_ALIGNED. When it is enabled we use
> 4KB mappings in initial page table setup otherwise we use 2MB/4MB
> mappings.
> 2. We map kernel and dtb (few MBs) in setup_vm() (called from head.S)
> 3. Once we reach paging_init() (called from setup_arch()) after
> memblock setup, we map all available memory banks.
>
> With this patch in-place, the booting constraint for RISCV32 and RISCV64
> kernel is much more relaxed when CONFIG_BOOT_PAGE_ALIGNED=y and we can
> now boot kernel very close to RAM start thereby minimizng memory wastage.

I have no general objection, but I presume the patch will be significantly
simplified if the addition of 4K pages support will follow the removal of
the trampoline_pd_dir.

That said, I didn't look into the details, since they will change
substantially, only some comments on the Kconfig part.

On the high level, have you considered using large pages in setup_vm() and
the remapping everything with 4K pages in setup_vm_final()? This might
save you the whole ops-> churn.

> Signed-off-by: Anup Patel <anup.patel@xxxxxxx>
> ---
> arch/riscv/Kconfig | 11 +
> arch/riscv/include/asm/fixmap.h | 5 +
> arch/riscv/include/asm/pgtable-64.h | 5 +
> arch/riscv/include/asm/pgtable.h | 6 +-
> arch/riscv/kernel/head.S | 1 +
> arch/riscv/kernel/setup.c | 4 +-
> arch/riscv/mm/init.c | 402 ++++++++++++++++++++++++----
> 7 files changed, 378 insertions(+), 56 deletions(-)
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index eb56c82d8aa1..1b0c66f7aba3 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -172,6 +172,17 @@ config SMP
>
> If you don't know what to do here, say N.
>
> +config BOOT_PAGE_ALIGNED
> + bool "Allow booting from page aligned address"

default no, please

> + help
> + This enables support for booting kernel from any page aligned
> + address (i.e. 4KB aligned). This option is particularly useful
> + on systems with very less RAM (few MBs) because using it we

^ small

> + can boot kernel closer RAM start thereby reducing unusable RAM
> + below kernel.
> +
> + If you don't know what to do here, say N.
> +
> config NR_CPUS
> int "Maximum number of CPUs (2-32)"
> range 2 32
> diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
> index 57afe604b495..5cf53dd882e5 100644
> --- a/arch/riscv/include/asm/fixmap.h
> +++ b/arch/riscv/include/asm/fixmap.h
> @@ -21,6 +21,11 @@
> */
> enum fixed_addresses {
> FIX_HOLE,
> +#define FIX_FDT_SIZE SZ_1M
> + FIX_FDT_END,
> + FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1,
> + FIX_PTE,
> + FIX_PMD,
> FIX_EARLYCON_MEM_BASE,
> __end_of_fixed_addresses
> };
> diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
> index 7aa0ea9bd8bb..56ecc3dc939d 100644
> --- a/arch/riscv/include/asm/pgtable-64.h
> +++ b/arch/riscv/include/asm/pgtable-64.h
> @@ -78,6 +78,11 @@ static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t prot)
> return __pmd((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
> }
>
> +static inline unsigned long _pmd_pfn(pmd_t pmd)
> +{
> + return pmd_val(pmd) >> _PAGE_PFN_SHIFT;
> +}
> +
> #define pmd_ERROR(e) \
> pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))
>
> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> index 1141364d990e..05fa2115e736 100644
> --- a/arch/riscv/include/asm/pgtable.h
> +++ b/arch/riscv/include/asm/pgtable.h
> @@ -121,12 +121,16 @@ static inline void pmd_clear(pmd_t *pmdp)
> set_pmd(pmdp, __pmd(0));
> }
>
> -
> static inline pgd_t pfn_pgd(unsigned long pfn, pgprot_t prot)
> {
> return __pgd((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
> }
>
> +static inline unsigned long _pgd_pfn(pgd_t pgd)
> +{
> + return pgd_val(pgd) >> _PAGE_PFN_SHIFT;
> +}
> +
> #define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
>
> /* Locate an entry in the page global directory */
> diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
> index 7966262b4f9d..12a3ec5eb8ab 100644
> --- a/arch/riscv/kernel/head.S
> +++ b/arch/riscv/kernel/head.S
> @@ -63,6 +63,7 @@ clear_bss_done:
> /* Initialize page tables and relocate to virtual addresses */
> la sp, init_thread_union + THREAD_SIZE
> la a0, _start
> + mv a1, s1
> call setup_vm
> call relocate
>
> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> index ecb654f6a79e..acdd0f74982b 100644
> --- a/arch/riscv/kernel/setup.c
> +++ b/arch/riscv/kernel/setup.c
> @@ -30,6 +30,7 @@
> #include <linux/sched/task.h>
> #include <linux/swiotlb.h>
>
> +#include <asm/fixmap.h>
> #include <asm/setup.h>
> #include <asm/sections.h>
> #include <asm/pgtable.h>
> @@ -62,7 +63,8 @@ unsigned long boot_cpu_hartid;
>
> void __init parse_dtb(unsigned int hartid, void *dtb)
> {
> - if (early_init_dt_scan(__va(dtb)))
> + dtb = (void *)fix_to_virt(FIX_FDT) + ((uintptr_t)dtb & ~PAGE_MASK);
> + if (early_init_dt_scan(dtb))
> return;
>
> pr_err("No DTB passed to the kernel\n");
> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> index e38f8195e45b..c389fbfeccd8 100644
> --- a/arch/riscv/mm/init.c
> +++ b/arch/riscv/mm/init.c
> @@ -1,14 +1,7 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> /*
> + * Copyright (C) 2019 Western Digital Corporation or its affiliates.
> * Copyright (C) 2012 Regents of the University of California
> - *
> - * This program is free software; you can redistribute it and/or
> - * modify it under the terms of the GNU General Public License
> - * as published by the Free Software Foundation, version 2.
> - *
> - * This program is distributed in the hope that it will be useful,
> - * but WITHOUT ANY WARRANTY; without even the implied warranty of
> - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> - * GNU General Public License for more details.
> */
>
> #include <linux/init.h>
> @@ -43,13 +36,6 @@ void setup_zero_page(void)
> memset((void *)empty_zero_page, 0, PAGE_SIZE);
> }
>
> -void __init paging_init(void)
> -{
> - setup_zero_page();
> - local_flush_tlb_all();
> - zone_sizes_init();
> -}
> -
> void __init mem_init(void)
> {
> #ifdef CONFIG_FLATMEM
> @@ -143,18 +129,36 @@ void __init setup_bootmem(void)
> }
> }
>
> +#define MAX_EARLY_MAPPING_SIZE SZ_128M
> +
> pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
> pgd_t trampoline_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
>
> #ifndef __PAGETABLE_PMD_FOLDED
> -#define NUM_SWAPPER_PMDS ((uintptr_t)-PAGE_OFFSET >> PGDIR_SHIFT)
> -pmd_t swapper_pmd[PTRS_PER_PMD*((-PAGE_OFFSET)/PGDIR_SIZE)] __page_aligned_bss;
> -pmd_t trampoline_pmd[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
> +#if MAX_EARLY_MAPPING_SIZE < PGDIR_SIZE
> +#define NUM_SWAPPER_PMDS 1UL
> +#else
> +#define NUM_SWAPPER_PMDS (MAX_EARLY_MAPPING_SIZE/PGDIR_SIZE)
> +#endif
> +#define NUM_TRAMPOLINE_PMDS 1UL
> +pmd_t swapper_pmd[PTRS_PER_PMD*NUM_SWAPPER_PMDS] __page_aligned_bss;
> +pmd_t trampoline_pmd[PTRS_PER_PMD*NUM_TRAMPOLINE_PMDS]
> + __initdata __aligned(PAGE_SIZE);
> pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
> +#define NUM_SWAPPER_PTES (MAX_EARLY_MAPPING_SIZE/PMD_SIZE)
> +#else
> +#define NUM_SWAPPER_PTES (MAX_EARLY_MAPPING_SIZE/PGDIR_SIZE)
> #endif
>
> +#define NUM_TRAMPOLINE_PTES 1UL
> +
> +pte_t swapper_pte[PTRS_PER_PTE*NUM_SWAPPER_PTES] __page_aligned_bss;
> +pte_t trampoline_pte[PTRS_PER_PTE*NUM_TRAMPOLINE_PTES]
> + __initdata __aligned(PAGE_SIZE);
> pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
>
> +uintptr_t map_size;
> +
> void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
> {
> unsigned long addr = __fix_to_virt(idx);
> @@ -172,6 +176,13 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
> }
> }
>
> +struct mapping_ops {
> + pte_t *(*get_pte_virt)(phys_addr_t pa);
> + phys_addr_t (*alloc_pte)(uintptr_t va, uintptr_t load_pa);
> + pmd_t *(*get_pmd_virt)(phys_addr_t pa);
> + phys_addr_t (*alloc_pmd)(uintptr_t va, uintptr_t load_pa);
> +};
> +
> static inline void *__load_addr(void *ptr, uintptr_t load_pa)
> {
> extern char _start;
> @@ -186,64 +197,347 @@ static inline void *__load_addr(void *ptr, uintptr_t load_pa)
> #define __load_va(ptr, load_pa) __load_addr(ptr, load_pa)
> #define __load_pa(ptr, load_pa) ((uintptr_t)__load_addr(ptr, load_pa))
>
> -asmlinkage void __init setup_vm(uintptr_t load_pa)
> +static phys_addr_t __init final_alloc_pgtable(void)
> +{
> + return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
> +}
> +
> +static pte_t *__init early_get_pte_virt(phys_addr_t pa)
> +{
> + return (pte_t *)((uintptr_t)pa);
> +}
> +
> +static pte_t *__init final_get_pte_virt(phys_addr_t pa)
> +{
> + clear_fixmap(FIX_PTE);
> +
> + return (pte_t *)set_fixmap_offset(FIX_PTE, pa);
> +}
> +
> +static phys_addr_t __init early_alloc_trampoline_pte(uintptr_t va,
> + uintptr_t load_pa)
> +{
> + pte_t *base = __load_va(trampoline_pte, load_pa);
> + uintptr_t pte_num = ((va - PAGE_OFFSET) >> PMD_SHIFT);
> +
> + BUG_ON(pte_num >= NUM_TRAMPOLINE_PTES);
> +
> + return (uintptr_t)&base[pte_num * PTRS_PER_PTE];
> +}
> +
> +static phys_addr_t __init early_alloc_swapper_pte(uintptr_t va,
> + uintptr_t load_pa)
> +{
> + pte_t *base = __load_va(swapper_pte, load_pa);
> + uintptr_t pte_num = ((va - PAGE_OFFSET) >> PMD_SHIFT);
> +
> + BUG_ON(pte_num >= NUM_SWAPPER_PTES);
> +
> + return (uintptr_t)&base[pte_num * PTRS_PER_PTE];
> +}
> +
> +static phys_addr_t __init final_alloc_pte(uintptr_t va, uintptr_t load_pa)
> +{
> + return final_alloc_pgtable();
> +}
> +
> +static void __init create_pte_mapping(pte_t *ptep,
> + uintptr_t va, phys_addr_t pa,
> + phys_addr_t sz, pgprot_t prot)
> {
> - uintptr_t i;
> + uintptr_t pte_index = pte_index(va);
> +
> + BUG_ON(sz != PAGE_SIZE);
> +
> + if (pte_none(ptep[pte_index]))
> + ptep[pte_index] = pfn_pte(PFN_DOWN(pa), prot);
> +}
> +
> #ifndef __PAGETABLE_PMD_FOLDED
> +static pmd_t *__init early_get_pmd_virt(phys_addr_t pa)
> +{
> + return (pmd_t *)((uintptr_t)pa);
> +}
> +
> +static pmd_t *__init final_get_pmd_virt(phys_addr_t pa)
> +{
> + clear_fixmap(FIX_PMD);
> +
> + return (pmd_t *)set_fixmap_offset(FIX_PMD, pa);
> +}
> +
> +static phys_addr_t __init early_alloc_trampoline_pmd(uintptr_t va,
> + uintptr_t load_pa)
> +{
> + pmd_t *base = __load_va(trampoline_pmd, load_pa);
> + uintptr_t pmd_num = (va - PAGE_OFFSET) >> PGDIR_SHIFT;
> +
> + BUG_ON(pmd_num >= NUM_TRAMPOLINE_PMDS);
> +
> + return (uintptr_t)&base[pmd_num * PTRS_PER_PMD];
> +}
> +
> +static phys_addr_t __init early_alloc_swapper_pmd(uintptr_t va,
> + uintptr_t load_pa)
> +{
> + pmd_t *base = __load_va(swapper_pmd, load_pa);
> + uintptr_t pmd_num = (va - PAGE_OFFSET) >> PGDIR_SHIFT;
> +
> + BUG_ON(pmd_num >= NUM_SWAPPER_PMDS);
> +
> + return (uintptr_t)&base[pmd_num * PTRS_PER_PMD];
> +}
> +
> +static phys_addr_t __init final_alloc_pmd(uintptr_t va, uintptr_t load_pa)
> +{
> + return final_alloc_pgtable();
> +}
> +
> +static void __init create_pmd_mapping(pmd_t *pmdp,
> + uintptr_t va, phys_addr_t pa,
> + phys_addr_t sz, pgprot_t prot,
> + uintptr_t ops_load_pa,
> + struct mapping_ops *ops)
> +{
> + pte_t *ptep;
> + phys_addr_t pte_phys;
> + uintptr_t pmd_index = pmd_index(va);
> +
> + if (sz == PMD_SIZE) {
> + if (pmd_none(pmdp[pmd_index]))
> + pmdp[pmd_index] = pfn_pmd(PFN_DOWN(pa), prot);
> + return;
> + }
> +
> + if (pmd_none(pmdp[pmd_index])) {
> + pte_phys = ops->alloc_pte(va, ops_load_pa);
> + pmdp[pmd_index] = pfn_pmd(PFN_DOWN(pte_phys),
> + __pgprot(_PAGE_TABLE));
> + ptep = ops->get_pte_virt(pte_phys);
> + memset(ptep, 0, PAGE_SIZE);
> + } else {
> + pte_phys = PFN_PHYS(_pmd_pfn(pmdp[pmd_index]));
> + ptep = ops->get_pte_virt(pte_phys);
> + }
> +
> + create_pte_mapping(ptep, va, pa, sz, prot);
> +}
> +
> +static void __init create_pgd_mapping(pgd_t *pgdp,
> + uintptr_t va, phys_addr_t pa,
> + phys_addr_t sz, pgprot_t prot,
> + uintptr_t ops_load_pa,
> + struct mapping_ops *ops)
> +{
> pmd_t *pmdp;
> + phys_addr_t pmd_phys;
> + uintptr_t pgd_index = pgd_index(va);
> +
> + if (sz == PGDIR_SIZE) {
> + if (pgd_val(pgdp[pgd_index]) == 0)
> + pgdp[pgd_index] = pfn_pgd(PFN_DOWN(pa), prot);
> + return;
> + }
> +
> + if (pgd_val(pgdp[pgd_index]) == 0) {
> + pmd_phys = ops->alloc_pmd(va, ops_load_pa);
> + pgdp[pgd_index] = pfn_pgd(PFN_DOWN(pmd_phys),
> + __pgprot(_PAGE_TABLE));
> + pmdp = ops->get_pmd_virt(pmd_phys);
> + memset(pmdp, 0, PAGE_SIZE);
> + } else {
> + pmd_phys = PFN_PHYS(_pgd_pfn(pgdp[pgd_index]));
> + pmdp = ops->get_pmd_virt(pmd_phys);
> + }
> +
> + create_pmd_mapping(pmdp, va, pa, sz, prot, ops_load_pa, ops);
> +}
> +#else
> +static void __init create_pgd_mapping(pgd_t *pgdp,
> + uintptr_t va, phys_addr_t pa,
> + phys_addr_t sz, pgprot_t prot,
> + uintptr_t ops_load_pa,
> + struct mapping_ops *ops)
> +{
> + pte_t *ptep;
> + phys_addr_t pte_phys;
> + uintptr_t pgd_index = pgd_index(va);
> +
> + if (sz == PGDIR_SIZE) {
> + if (pgd_val(pgdp[pgd_index]) == 0)
> + pgdp[pgd_index] = pfn_pgd(PFN_DOWN(pa), prot);
> + return;
> + }
> +
> + if (pgd_val(pgdp[pgd_index]) == 0) {
> + pte_phys = ops->alloc_pte(va, ops_load_pa);
> + pgdp[pgd_index] = pfn_pgd(PFN_DOWN(pte_phys),
> + __pgprot(_PAGE_TABLE));
> + ptep = ops->get_pte_virt(pte_phys);
> + memset(ptep, 0, PAGE_SIZE);
> + } else {
> + pte_phys = PFN_PHYS(_pgd_pfn(pgdp[pgd_index]));
> + ptep = ops->get_pte_virt(pte_phys);
> + }
> +
> + create_pte_mapping(ptep, va, pa, sz, prot);
> +}
> +#endif
> +
> +static uintptr_t __init best_map_size(uintptr_t load_pa, phys_addr_t size)
> +{
> +#ifdef CONFIG_BOOT_PAGE_ALIGNED
> + uintptr_t map_sz = PAGE_SIZE;
> +#else
> +#ifndef __PAGETABLE_PMD_FOLDED
> + uintptr_t map_sz = PMD_SIZE;
> +#else
> + uintptr_t map_sz = PGDIR_SIZE;
> +#endif
> #endif
> - pgd_t *pgdp;
> +
> +#ifndef __PAGETABLE_PMD_FOLDED
> + if (!(load_pa & (PMD_SIZE - 1)) &&
> + (size >= PMD_SIZE) &&
> + (map_sz < PMD_SIZE))
> + map_sz = PMD_SIZE;
> +#endif
> +
> + if (!(load_pa & (PGDIR_SIZE - 1)) &&
> + (size >= PGDIR_SIZE) &&
> + (map_sz < PGDIR_SIZE))
> + map_sz = PGDIR_SIZE;
> +
> + return map_sz;
> +}
> +
> +asmlinkage void __init setup_vm(uintptr_t load_pa, uintptr_t dtb_pa)
> +{
> phys_addr_t map_pa;
> + uintptr_t va, end_va;
> + uintptr_t load_sz = __load_pa(&_end, load_pa) - load_pa;
> pgprot_t tableprot = __pgprot(_PAGE_TABLE);
> pgprot_t prot = __pgprot(pgprot_val(PAGE_KERNEL) | _PAGE_EXEC);
> + struct mapping_ops tramp_ops, swap_ops;
>
> va_pa_offset = PAGE_OFFSET - load_pa;
> pfn_base = PFN_DOWN(load_pa);
> + map_size = best_map_size(load_pa, PGDIR_SIZE);
>
> /* Sanity check alignment and size */
> BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0);
> - BUG_ON((load_pa % (PAGE_SIZE * PTRS_PER_PTE)) != 0);
> + BUG_ON((load_pa % map_size) != 0);
> + BUG_ON(load_sz > MAX_EARLY_MAPPING_SIZE);
>
> -#ifndef __PAGETABLE_PMD_FOLDED
> - pgdp = __load_va(trampoline_pg_dir, load_pa);
> - map_pa = __load_pa(trampoline_pmd, load_pa);
> - pgdp[(PAGE_OFFSET >> PGDIR_SHIFT) % PTRS_PER_PGD] =
> - pfn_pgd(PFN_DOWN(map_pa), tableprot);
> - trampoline_pmd[0] = pfn_pmd(PFN_DOWN(load_pa), prot);
> + /* Setup trampoline mapping ops */
> + tramp_ops.get_pte_virt = __load_va(early_get_pte_virt, load_pa);
> + tramp_ops.alloc_pte = __load_va(early_alloc_trampoline_pte, load_pa);
> + tramp_ops.get_pmd_virt = NULL;
> + tramp_ops.alloc_pmd = NULL;
>
> - pgdp = __load_va(swapper_pg_dir, load_pa);
> + /* Setup swapper mapping ops */
> + swap_ops.get_pte_virt = __load_va(early_get_pte_virt, load_pa);
> + swap_ops.alloc_pte = __load_va(early_alloc_swapper_pte, load_pa);
> + swap_ops.get_pmd_virt = NULL;
> + swap_ops.alloc_pmd = NULL;
>
> - for (i = 0; i < (-PAGE_OFFSET)/PGDIR_SIZE; ++i) {
> - size_t o = (PAGE_OFFSET >> PGDIR_SHIFT) % PTRS_PER_PGD + i;
> +#ifndef __PAGETABLE_PMD_FOLDED
> + /* Update trampoline mapping ops for PMD */
> + tramp_ops.get_pmd_virt = __load_va(early_get_pmd_virt, load_pa);
> + tramp_ops.alloc_pmd = __load_va(early_alloc_trampoline_pmd, load_pa);
>
> - map_pa = __load_pa(swapper_pmd, load_pa);
> - pgdp[o] = pfn_pgd(PFN_DOWN(map_pa) + i, tableprot);
> - }
> - pmdp = __load_va(swapper_pmd, load_pa);
> - for (i = 0; i < ARRAY_SIZE(swapper_pmd); i++)
> - pmdp[i] = pfn_pmd(PFN_DOWN(load_pa + i * PMD_SIZE), prot);
> + /* Update swapper mapping ops for PMD */
> + swap_ops.get_pmd_virt = __load_va(early_get_pmd_virt, load_pa);
> + swap_ops.alloc_pmd = __load_va(early_alloc_swapper_pmd, load_pa);
>
> + /* Setup swapper PGD and PMD for fixmap */
> map_pa = __load_pa(fixmap_pmd, load_pa);
> - pgdp[(FIXADDR_START >> PGDIR_SHIFT) % PTRS_PER_PGD] =
> - pfn_pgd(PFN_DOWN(map_pa), tableprot);
> - pmdp = __load_va(fixmap_pmd, load_pa);
> + create_pgd_mapping(__load_va(swapper_pg_dir, load_pa),
> + FIXADDR_START, map_pa, PGDIR_SIZE, tableprot,
> + load_pa, &swap_ops);
> map_pa = __load_pa(fixmap_pte, load_pa);
> - fixmap_pmd[(FIXADDR_START >> PMD_SHIFT) % PTRS_PER_PMD] =
> - pfn_pmd(PFN_DOWN(map_pa), tableprot);
> + create_pmd_mapping(__load_va(fixmap_pmd, load_pa),
> + FIXADDR_START, map_pa, PMD_SIZE, tableprot,
> + load_pa, &swap_ops);
> #else
> - pgdp = __load_va(trampoline_pg_dir, load_pa);
> - pgdp[(PAGE_OFFSET >> PGDIR_SHIFT) % PTRS_PER_PGD] =
> - pfn_pgd(PFN_DOWN(load_pa), prot);
> + /* Setup swapper PGD for fixmap */
> + map_pa = __load_pa(fixmap_pte, load_pa);
> + create_pgd_mapping(__load_va(swapper_pg_dir, load_pa),
> + FIXADDR_START, map_pa, PGDIR_SIZE, tableprot,
> + load_pa, &swap_ops);
> +#endif
>
> - pgdp = __load_va(swapper_pg_dir, load_pa);
> - for (i = 0; i < (-PAGE_OFFSET)/PGDIR_SIZE; ++i) {
> - size_t o = (PAGE_OFFSET >> PGDIR_SHIFT) % PTRS_PER_PGD + i;
> + /* Setup trampoling PGD covering first few MBs of kernel */
> + end_va = PAGE_OFFSET + PAGE_SIZE*PTRS_PER_PTE;
> + for (va = PAGE_OFFSET; va < end_va; va += map_size)
> + create_pgd_mapping(__load_va(trampoline_pg_dir, load_pa),
> + va, load_pa + (va - PAGE_OFFSET),
> + map_size, prot, load_pa, &tramp_ops);
> +
> + /*
> + * Setup swapper PGD covering entire kernel which will allows
> + * us to reach paging_init(). We map all memory banks later in
> + * setup_vm_final() below.
> + */
> + end_va = PAGE_OFFSET + load_sz;
> + for (va = PAGE_OFFSET; va < end_va; va += map_size)
> + create_pgd_mapping(__load_va(swapper_pg_dir, load_pa),
> + va, load_pa + (va - PAGE_OFFSET),
> + map_size, prot, load_pa, &swap_ops);
> +
> + /* Create fixed mapping for early parsing of FDT */
> + end_va = __fix_to_virt(FIX_FDT) + FIX_FDT_SIZE;
> + for (va = __fix_to_virt(FIX_FDT); va < end_va; va += PAGE_SIZE)
> + create_pte_mapping(__load_va(fixmap_pte, load_pa),
> + va, dtb_pa + (va - __fix_to_virt(FIX_FDT)),
> + PAGE_SIZE, prot);
> +}
>
> - pgdp[o] = pfn_pgd(PFN_DOWN(load_pa + i * PGDIR_SIZE), prot);
> - }
> +static void __init setup_vm_final(void)
> +{
> + phys_addr_t pa, start, end;
> + struct memblock_region *reg;
> + struct mapping_ops ops;
> + pgprot_t prot = __pgprot(pgprot_val(PAGE_KERNEL) | _PAGE_EXEC);
>
> - map_pa = __load_pa(fixmap_pte, load_pa);
> - pgdp[(FIXADDR_START >> PGDIR_SHIFT) % PTRS_PER_PGD] =
> - pfn_pgd(PFN_DOWN(map_pa), tableprot);
> + /* Setup mapping ops */
> + ops.get_pte_virt = final_get_pte_virt;
> + ops.alloc_pte = final_alloc_pte;
> +#ifndef __PAGETABLE_PMD_FOLDED
> + ops.get_pmd_virt = final_get_pmd_virt;
> + ops.alloc_pmd = final_alloc_pmd;
> +#else
> + ops.get_pmd_virt = NULL;
> + ops.alloc_pmd = NULL;
> #endif
> +
> + /* Map all memory banks */
> + for_each_memblock(memory, reg) {
> + start = reg->base;
> + end = start + reg->size;
> +
> + if (start >= end)
> + break;
> + if (memblock_is_nomap(reg))
> + continue;
> + if (start <= __pa(PAGE_OFFSET) &&
> + __pa(PAGE_OFFSET) < end)
> + start = __pa(PAGE_OFFSET);
> +
> + for (pa = start; pa < end; pa += map_size)
> + create_pgd_mapping(swapper_pg_dir,
> + (uintptr_t)__va(pa), pa,
> + map_size, prot, 0, &ops);
> + }
> +
> + clear_fixmap(FIX_PTE);
> + clear_fixmap(FIX_PMD);
> +}
> +
> +void __init paging_init(void)
> +{
> + setup_vm_final();
> + setup_zero_page();
> + local_flush_tlb_all();
> + zone_sizes_init();
> }
> --
> 2.17.1
>

--
Sincerely yours,
Mike.