[RFC PATCH v1 54/57] arm64: Support runtime folding in idmap_kpti_install_ng_mappings

From: Ryan Roberts
Date: Mon Oct 14 2024 - 07:10:53 EST


TODO:

Signed-off-by: Ryan Roberts <ryan.roberts@xxxxxxx>
---

***NOTE***
Any confused maintainers may want to read the cover note here for context:
https://lore.kernel.org/all/20241014105514.3206191-1-ryan.roberts@xxxxxxx/

arch/arm64/include/asm/assembler.h | 5 ++
arch/arm64/kernel/cpufeature.c | 21 +++++-
arch/arm64/mm/proc.S | 107 ++++++++++++++++++++++-------
3 files changed, 108 insertions(+), 25 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 6424fd6be1cbe..0cfa7c3efd214 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -919,6 +919,11 @@ alternative_cb_end
value_for_page_size \val, \val, SZ_4K, SZ_16K, SZ_64K
.endm

+ .macro get_page_shift, val
+ get_tg0 \val
+ value_for_page_size \val, \val, ARM64_PAGE_SHIFT_4K, ARM64_PAGE_SHIFT_16K, ARM64_PAGE_SHIFT_64K
+ .endm
+
.macro get_page_mask, val
get_tg0 \val
value_for_page_size \val, \val, (~(SZ_4K-1)), (~(SZ_16K-1)), (~(SZ_64K-1))
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 663cc76569a27..ee94de556d3f0 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1908,11 +1908,27 @@ static phys_addr_t __init kpti_ng_pgd_alloc(int shift)
return kpti_ng_temp_alloc;
}

+struct install_ng_pgtable_geometry {
+ unsigned long ptrs_per_pte;
+ unsigned long ptrs_per_pmd;
+ unsigned long ptrs_per_pud;
+ unsigned long ptrs_per_p4d;
+ unsigned long ptrs_per_pgd;
+};
+
static int __init __kpti_install_ng_mappings(void *__unused)
{
- typedef void (kpti_remap_fn)(int, int, phys_addr_t, unsigned long);
+ typedef void (kpti_remap_fn)(int, int, phys_addr_t, unsigned long,
+ struct install_ng_pgtable_geometry *);
extern kpti_remap_fn idmap_kpti_install_ng_mappings;
kpti_remap_fn *remap_fn;
+ struct install_ng_pgtable_geometry geometry = {
+ .ptrs_per_pte = PTRS_PER_PTE,
+ .ptrs_per_pmd = PTRS_PER_PMD,
+ .ptrs_per_pud = PTRS_PER_PUD,
+ .ptrs_per_p4d = PTRS_PER_P4D,
+ .ptrs_per_pgd = PTRS_PER_PGD,
+ };

int cpu = smp_processor_id();
int levels = CONFIG_PGTABLE_LEVELS;
@@ -1957,7 +1973,8 @@ static int __init __kpti_install_ng_mappings(void *__unused)
}

cpu_install_idmap();
- remap_fn(cpu, num_online_cpus(), kpti_ng_temp_pgd_pa, KPTI_NG_TEMP_VA);
+ remap_fn(cpu, num_online_cpus(), kpti_ng_temp_pgd_pa, KPTI_NG_TEMP_VA,
+ &geometry);
cpu_uninstall_idmap();

if (!cpu) {
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index ab5aa84923524..11bf6ba6dac33 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -190,7 +190,7 @@ SYM_FUNC_ALIAS(__pi_idmap_cpu_replace_ttbr1, idmap_cpu_replace_ttbr1)
.pushsection ".idmap.text", "a"

.macro pte_to_phys, phys, pte
- and \phys, \pte, #PTE_ADDR_LOW
+ and \phys, \pte, pte_addr_low
#ifdef CONFIG_ARM64_PA_BITS_52
and \pte, \pte, #PTE_ADDR_HIGH
orr \phys, \phys, \pte, lsl #PTE_ADDR_HIGH_SHIFT
@@ -198,7 +198,8 @@ SYM_FUNC_ALIAS(__pi_idmap_cpu_replace_ttbr1, idmap_cpu_replace_ttbr1)
.endm

.macro kpti_mk_tbl_ng, type, num_entries
- add end_\type\()p, cur_\type\()p, #\num_entries * 8
+ lsl scratch, \num_entries, #3
+ add end_\type\()p, cur_\type\()p, scratch
.Ldo_\type:
ldr \type, [cur_\type\()p], #8 // Load the entry and advance
tbz \type, #0, .Lnext_\type // Skip invalid and
@@ -220,14 +221,18 @@ SYM_FUNC_ALIAS(__pi_idmap_cpu_replace_ttbr1, idmap_cpu_replace_ttbr1)
.macro kpti_map_pgtbl, type, level
str xzr, [temp_pte, #8 * (\level + 2)] // break before make
dsb nshst
- add pte, temp_pte, #PAGE_SIZE * (\level + 2)
+ mov scratch, #(\level + 2)
+ mul scratch, scratch, page_size
+ add pte, temp_pte, scratch
lsr pte, pte, #12
tlbi vaae1, pte
dsb nsh
isb

phys_to_pte pte, cur_\type\()p
- add cur_\type\()p, temp_pte, #PAGE_SIZE * (\level + 2)
+ mov scratch, #(\level + 2)
+ mul scratch, scratch, page_size
+ add cur_\type\()p, temp_pte, scratch
orr pte, pte, pte_flags
str pte, [temp_pte, #8 * (\level + 2)]
dsb nshst
@@ -235,7 +240,8 @@ SYM_FUNC_ALIAS(__pi_idmap_cpu_replace_ttbr1, idmap_cpu_replace_ttbr1)

/*
* void __kpti_install_ng_mappings(int cpu, int num_secondaries, phys_addr_t temp_pgd,
- * unsigned long temp_pte_va)
+ * unsigned long temp_pte_va,
+ * struct install_ng_pgtable_geometry *geometry)
*
* Called exactly once from stop_machine context by each CPU found during boot.
*/
@@ -251,6 +257,8 @@ SYM_TYPED_FUNC_START(idmap_kpti_install_ng_mappings)
temp_pgd_phys .req x2
swapper_ttb .req x3
flag_ptr .req x4
+ geometry .req x4
+ scratch .req x4
cur_pgdp .req x5
end_pgdp .req x6
pgd .req x7
@@ -264,18 +272,45 @@ SYM_TYPED_FUNC_START(idmap_kpti_install_ng_mappings)
valid .req x17
cur_p4dp .req x19
end_p4dp .req x20
-
- mov x5, x3 // preserve temp_pte arg
- mrs swapper_ttb, ttbr1_el1
- adr_l flag_ptr, __idmap_kpti_flag
+ page_size .req x21
+ ptrs_per_pte .req x22
+ ptrs_per_pmd .req x23
+ ptrs_per_pud .req x24
+ ptrs_per_p4d .req x25
+ ptrs_per_pgd .req x26
+ pte_addr_low .req x27

cbnz cpu, __idmap_kpti_secondary

-#if CONFIG_PGTABLE_LEVELS > 4
- stp x29, x30, [sp, #-32]!
+ /* Preserve callee-saved registers */
+ stp x19, x20, [sp, #-96]!
+ stp x21, x22, [sp, #80]
+ stp x23, x24, [sp, #64]
+ stp x25, x26, [sp, #48]
+ stp x27, x28, [sp, #32]
+ stp x29, x30, [sp, #16]
mov x29, sp
- stp x19, x20, [sp, #16]
-#endif
+
+ /* Load pgtable geometry parameters */
+ get_page_size page_size
+ ldr ptrs_per_pte, [geometry, #0]
+ ldr ptrs_per_pmd, [geometry, #8]
+ ldr ptrs_per_pud, [geometry, #16]
+ ldr ptrs_per_p4d, [geometry, #24]
+ ldr ptrs_per_pgd, [geometry, #32]
+
+ /* Precalculate pte_addr_low mask */
+ get_page_shift x0
+ mov pte_addr_low, #50
+ sub pte_addr_low, pte_addr_low, x0
+ mov scratch, #1
+ lsl pte_addr_low, scratch, pte_addr_low
+ sub pte_addr_low, pte_addr_low, #1
+ lsl pte_addr_low, pte_addr_low, x0
+
+ mov temp_pte, x3
+ mrs swapper_ttb, ttbr1_el1
+ adr_l flag_ptr, __idmap_kpti_flag

/* We're the boot CPU. Wait for the others to catch up */
sevl
@@ -290,7 +325,6 @@ SYM_TYPED_FUNC_START(idmap_kpti_install_ng_mappings)
msr ttbr1_el1, temp_pgd_phys
isb

- mov temp_pte, x5
mov_q pte_flags, KPTI_NG_PTE_FLAGS

/* Everybody is enjoying the idmap, so we can rewrite swapper. */
@@ -320,7 +354,7 @@ alternative_else_nop_endif
/* PGD */
adrp cur_pgdp, swapper_pg_dir
kpti_map_pgtbl pgd, -1
- kpti_mk_tbl_ng pgd, PTRS_PER_PGD
+ kpti_mk_tbl_ng pgd, ptrs_per_pgd

/* Ensure all the updated entries are visible to secondary CPUs */
dsb ishst
@@ -331,21 +365,33 @@ alternative_else_nop_endif
isb

/* Set the flag to zero to indicate that we're all done */
+ adr_l flag_ptr, __idmap_kpti_flag
str wzr, [flag_ptr]
-#if CONFIG_PGTABLE_LEVELS > 4
- ldp x19, x20, [sp, #16]
- ldp x29, x30, [sp], #32
-#endif
+
+ /* Restore callee-saved registers */
+ ldp x29, x30, [sp, #16]
+ ldp x27, x28, [sp, #32]
+ ldp x25, x26, [sp, #48]
+ ldp x23, x24, [sp, #64]
+ ldp x21, x22, [sp, #80]
+ ldp x19, x20, [sp], #96
+
ret

.Lderef_pgd:
/* P4D */
.if CONFIG_PGTABLE_LEVELS > 4
p4d .req x30
+ cmp ptrs_per_p4d, #1
+ b.eq .Lfold_p4d
pte_to_phys cur_p4dp, pgd
kpti_map_pgtbl p4d, 0
- kpti_mk_tbl_ng p4d, PTRS_PER_P4D
+ kpti_mk_tbl_ng p4d, ptrs_per_p4d
b .Lnext_pgd
+.Lfold_p4d:
+ mov p4d, pgd // fold to next level
+ mov cur_p4dp, end_p4dp // must be equal to terminate loop
+ b .Lderef_p4d
.else /* CONFIG_PGTABLE_LEVELS <= 4 */
p4d .req pgd
.set .Lnext_p4d, .Lnext_pgd
@@ -355,10 +401,16 @@ alternative_else_nop_endif
/* PUD */
.if CONFIG_PGTABLE_LEVELS > 3
pud .req x10
+ cmp ptrs_per_pud, #1
+ b.eq .Lfold_pud
pte_to_phys cur_pudp, p4d
kpti_map_pgtbl pud, 1
- kpti_mk_tbl_ng pud, PTRS_PER_PUD
+ kpti_mk_tbl_ng pud, ptrs_per_pud
b .Lnext_p4d
+.Lfold_pud:
+ mov pud, p4d // fold to next level
+ mov cur_pudp, end_pudp // must be equal to terminate loop
+ b .Lderef_pud
.else /* CONFIG_PGTABLE_LEVELS <= 3 */
pud .req pgd
.set .Lnext_pud, .Lnext_pgd
@@ -368,10 +420,16 @@ alternative_else_nop_endif
/* PMD */
.if CONFIG_PGTABLE_LEVELS > 2
pmd .req x13
+ cmp ptrs_per_pmd, #1
+ b.eq .Lfold_pmd
pte_to_phys cur_pmdp, pud
kpti_map_pgtbl pmd, 2
- kpti_mk_tbl_ng pmd, PTRS_PER_PMD
+ kpti_mk_tbl_ng pmd, ptrs_per_pmd
b .Lnext_pud
+.Lfold_pmd:
+ mov pmd, pud // fold to next level
+ mov cur_pmdp, end_pmdp // must be equal to terminate loop
+ b .Lderef_pmd
.else /* CONFIG_PGTABLE_LEVELS <= 2 */
pmd .req pgd
.set .Lnext_pmd, .Lnext_pgd
@@ -381,7 +439,7 @@ alternative_else_nop_endif
/* PTE */
pte_to_phys cur_ptep, pmd
kpti_map_pgtbl pte, 3
- kpti_mk_tbl_ng pte, PTRS_PER_PTE
+ kpti_mk_tbl_ng pte, ptrs_per_pte
b .Lnext_pmd

.unreq cpu
@@ -408,6 +466,9 @@ alternative_else_nop_endif

/* Secondary CPUs end up here */
__idmap_kpti_secondary:
+ mrs swapper_ttb, ttbr1_el1
+ adr_l flag_ptr, __idmap_kpti_flag
+
/* Uninstall swapper before surgery begins */
__idmap_cpu_set_reserved_ttbr1 x16, x17

--
2.43.0