[PATCH 08/11] arm64: mm: Always call PTE/PMD ctor in __create_pgd_mapping()

From: Kevin Brodsky
Date: Mon Mar 17 2025 - 10:24:26 EST


TL;DR: always call the PTE/PMD ctor, passing the appropriate mm to
skip ptlock_init() if unneeded.

__create_pgd_mapping() is used for creating different kinds of
mappings, and may allocate page table pages if passed an allocator
callback. There are currently three such cases:

1. create_pgd_mapping(), which is used to create the EFI mapping
2. arch_add_memory()
3. map_entry_trampoline()

1. uses pgd_pgtable_alloc() as allocator callback, which calls the
PTE/PMD ctor, while 2. and 3. use __pgd_pgtable_alloc(), which does
not. The rationale is most likely that pgtables associated with
init_mm do not make use of split page table locks, and it is
therefore unnecessary to initialise them by calling the ctor. 2.
operates on swapper_pg_dir so the allocated pgtables are clearly
associated with init_mm, this is arguably the case for 3. too (the
trampoline mapping is never modified so ptlocks are anyway
irrelevant). 1. corresponds to efi_mm so ptlocks need to be
initialised in that case.

We are now moving towards calling the ctor for all page tables, even
those associated with init_mm. pagetable_{pte,pmd}_ctor() have
become aware of the associated mm so that the ptlock initialisation
can be skipped for init_mm. This patch therefore amends the
allocator callbacks so that the PTE/PMD ctor are always called, with
an appropriate mm pointer to avoid unnecessary ptlock overhead.

Modifying the prototype of the allocator callbacks to take the mm
and propagating that pointer all the way down would be pretty
invasive. Instead:

* __pgd_pgtable_alloc() (cases 2. and 3. above) is replaced with
pgd_pgtable_alloc_init_mm(), resulting in the ctors being called
with &init_mm. This is the main functional change in this patch;
the ptlock still isn't initialised, but other ctor actions (e.g.
accounting-related) are now carried out for those allocated
pgtables.

* pgd_pgtable_alloc() (case 1. above) is replaced with
pgd_pgtable_alloc_special_mm(), resulting in the ctors being
called with NULL as mm. No functional change here; NULL
essentially means "not init_mm", and the ptlock is still
initialised.

__pgd_pgtable_alloc() is now the common implementation of those two
helpers. While at it we switch it to using pagetable_alloc() like
standard pgtable allocator functions and remove the comment
regarding ctor calls (ctors are now always expected to be called).

Signed-off-by: Kevin Brodsky <kevin.brodsky@xxxxxxx>
---
arch/arm64/mm/mmu.c | 41 +++++++++++++++++++++--------------------
1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index a7292ce9d7b8..accb0a33c59f 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -480,31 +480,22 @@ void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt,
int flags);
#endif

-static phys_addr_t __pgd_pgtable_alloc(enum pgtable_type pgtable_type)
+static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm,
+ enum pgtable_type pgtable_type)
{
/* Page is zeroed by init_clear_pgtable() so don't duplicate effort. */
- void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL & ~__GFP_ZERO);
+ struct ptdesc *ptdesc = pagetable_alloc(GFP_PGTABLE_KERNEL & ~__GFP_ZERO, 0);
+ phys_addr_t pa;

- BUG_ON(!ptr);
- return __pa(ptr);
-}
-
-static phys_addr_t pgd_pgtable_alloc(enum pgtable_type pgtable_type)
-{
- phys_addr_t pa = __pgd_pgtable_alloc(pgtable_type);
- struct ptdesc *ptdesc = page_ptdesc(phys_to_page(pa));
+ BUG_ON(!ptdesc);
+ pa = page_to_phys(ptdesc_page(ptdesc));

- /*
- * Call proper page table ctor in case later we need to
- * call core mm functions like apply_to_page_range() on
- * this pre-allocated page table.
- */
switch (pgtable_type) {
case TABLE_PTE:
- BUG_ON(!pagetable_pte_ctor(NULL, ptdesc));
+ BUG_ON(!pagetable_pte_ctor(mm, ptdesc));
break;
case TABLE_PMD:
- BUG_ON(!pagetable_pmd_ctor(NULL, ptdesc));
+ BUG_ON(!pagetable_pmd_ctor(mm, ptdesc));
break;
default:
break;
@@ -513,6 +504,16 @@ static phys_addr_t pgd_pgtable_alloc(enum pgtable_type pgtable_type)
return pa;
}

+static phys_addr_t pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type)
+{
+ return __pgd_pgtable_alloc(&init_mm, pgtable_type);
+}
+
+static phys_addr_t pgd_pgtable_alloc_special_mm(enum pgtable_type pgtable_type)
+{
+ return __pgd_pgtable_alloc(NULL, pgtable_type);
+}
+
/*
* This function can only be used to modify existing table entries,
* without allocating new levels of table. Note that this permits the
@@ -542,7 +543,7 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;

__create_pgd_mapping(mm->pgd, phys, virt, size, prot,
- pgd_pgtable_alloc, flags);
+ pgd_pgtable_alloc_special_mm, flags);
}

static void update_mapping_prot(phys_addr_t phys, unsigned long virt,
@@ -756,7 +757,7 @@ static int __init map_entry_trampoline(void)
memset(tramp_pg_dir, 0, PGD_SIZE);
__create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS,
entry_tramp_text_size(), prot,
- __pgd_pgtable_alloc, NO_BLOCK_MAPPINGS);
+ pgd_pgtable_alloc_init_mm, NO_BLOCK_MAPPINGS);

/* Map both the text and data into the kernel page table */
for (i = 0; i < DIV_ROUND_UP(entry_tramp_text_size(), PAGE_SIZE); i++)
@@ -1362,7 +1363,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;

__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
- size, params->pgprot, __pgd_pgtable_alloc,
+ size, params->pgprot, pgd_pgtable_alloc_init_mm,
flags);

memblock_clear_nomap(start, size);
--
2.47.0