Re: [RFC V2 14/14] arm64/mm: Add initial support for FEAT_D128 page tables

From: Ryan Roberts

Date: Wed May 27 2026 - 11:02:04 EST


On 13/05/2026 05:45, Anshuman Khandual wrote:
> Add build time support for FEAT_D128 page tables with a new Kconfig option
> i.e CONFIG_ARM64_D128. When selected, PTE types become 128 bits wide and
> PTE bits are mapped to their new locations. Besides the basic page table
> geometry is also updated since each table page now holds half the number
> of entries (aka PTRS_PER_PXX) as it did previously.
>
> Since FEAT_D128 exclusively supports the permission indirection style for
> page table entry permission management, given kernel compiled for FEAT_D128
> requires both FEAT_S1PIE and FEAT_D128. If these architecture features are
> not present at boot, the kernel panics just like it does when there is a
> granule size mismatch.
>
> TTBR0/1_EL1 and PAR_EL1 registers become 128 bit wide when D128 is enabled,
> thus requiring MSRR/MRRS instructions for their updates. Because PA_BITS is
> still capped at 52 bits, MRS/MSR instructions are currently sufficient for
> the register accesses that basically operate on the lower 64 bits. Although
> entire 128 bits for these registers get cleared during boot via MSRR.
>
> Add support for TLBIP instruction for TLB flush macros with level hint and
> address range operations. Although existing TLBI based TLB flush would have
> been sufficient given PA_BITS is still capped at 52, but then it would have
> lacked both level hint and range support.
>
> This enables support for all granule size, VA_BITS and PA_BITS combination.
>
> Cc: Catalin Marinas <catalin.marinas@xxxxxxx>
> Cc: Will Deacon <will@xxxxxxxxxx>
> Cc: Ryan Roberts <ryan.roberts@xxxxxxx>
> Cc: Mark Rutland <mark.rutland@xxxxxxx>
> Cc: linux-arm-kernel@xxxxxxxxxxxxxxxxxxx
> Cc: linux-kernel@xxxxxxxxxxxxxxx
> Signed-off-by: Linu Cherian <linu.cherian@xxxxxxx> (TLBIP instructions)
> Signed-off-by: Anshuman Khandual <anshuman.khandual@xxxxxxx>
> ---
> Changes in RFC V2:
>
> - Updated ARM64_CONT_[PTE|PMD]_SHIFT both for 16K and 64K base pages
> - Adopted TLBIP implementation to recent TLB flush changes
> - Renamed __PRIpte as __PRIpxx per David
> - Renamed all ptdesc_ instances as pxxval_ instead
>
> arch/arm64/Kconfig | 51 ++++++++-
> arch/arm64/Makefile | 4 +
> arch/arm64/include/asm/assembler.h | 4 +-
> arch/arm64/include/asm/el2_setup.h | 9 ++
> arch/arm64/include/asm/pgtable-hwdef.h | 137 +++++++++++++++++++++++++
> arch/arm64/include/asm/pgtable-prot.h | 18 +++-
> arch/arm64/include/asm/pgtable-types.h | 9 ++
> arch/arm64/include/asm/pgtable.h | 56 +++++++++-
> arch/arm64/include/asm/smp.h | 1 +
> arch/arm64/include/asm/tlbflush.h | 68 ++++++++++--
> arch/arm64/kernel/head.S | 12 +++
> arch/arm64/mm/proc.S | 25 ++++-
> 12 files changed, 374 insertions(+), 20 deletions(-)
>

[...]

Some comments on tlbflush.h only:

> diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
> index 361d74ef8016..7831759b98e1 100644
> --- a/arch/arm64/include/asm/tlbflush.h
> +++ b/arch/arm64/include/asm/tlbflush.h
> @@ -41,6 +41,25 @@
>
> #define __tlbi(op, ...) __TLBI_N(op, ##__VA_ARGS__, 1, 0)
>
> +#ifdef CONFIG_ARM64_D128
> +#define __tlbip(op, arg) do { \
> + asm (ARM64_ASM_PREAMBLE \
> + ".arch_extension d128\n\t" \
> + "tlbip " #op ", %0, %H0\n" \
> + : : "r" (arg.full)); \
> +} while (0)
> +
> +#define __tlbip_user(op, arg) do { \
> + if (arm64_kernel_unmapped_at_el0()) { \
> + arg.low |= USER_ASID_FLAG; \
> + __tlbip(op, (arg)); \
> + } \
> +} while (0)
> +
> +#endif
> +
> +#define TLBI_ASID_MASK GENMASK_ULL(63, 48)
> +
> #define __tlbi_user(op, arg) do { \
> if (arm64_kernel_unmapped_at_el0()) \
> __tlbi(op, (arg) | USER_ASID_FLAG); \
> @@ -162,9 +181,15 @@ static inline void sme_dvmsync_batch(struct arch_tlbflush_unmap_batch *batch)
>
> #define TLBI_TTL_UNKNOWN INT_MAX
>
> +#ifdef CONFIG_ARM64_D128
> +typedef union __u128_halves tlbi_args_t;

I wonder if you could just define this as u128? That would make things a bit
neater I think? - You should be able to do normal bit twiddling I think?

> +#define __tlbi_wrapper(op, arg) __tlbip(op, arg)
> +#define __tlbi_user_wrapper(op, arg) __tlbip_user(op, arg)
> +#else
> typedef u64 tlbi_args_t;
> #define __tlbi_wrapper(op, arg) __tlbi(op, arg)
> #define __tlbi_user_wrapper(op, arg) __tlbi_user(op, arg)
> +#endif
>
> typedef void (*tlbi_op)(tlbi_args_t arg);
>
> @@ -211,17 +236,28 @@ static __always_inline void ipas2e1is(tlbi_args_t arg)
> __tlbi_wrapper(ipas2e1is, arg);
> }
>
> -static __always_inline void __tlbi_level_asid(tlbi_op op, u64 addr, u32 level,
> - u16 asid)
> +static __always_inline void __tlbi_update_level(u32 level, u64 *arg)
> {
> - u64 arg = __TLBI_VADDR(addr, asid);
> -
> if (alternative_has_cap_unlikely(ARM64_HAS_ARMv8_4_TTL) && level <= 3) {
> u64 ttl = level | (get_trans_granule() << 2);
>
> - FIELD_MODIFY(TLBI_TTL_MASK, &arg, ttl);
> + FIELD_MODIFY(TLBI_TTL_MASK, arg, ttl);
> }
> +}
> +
> +static __always_inline void __tlbi_level_asid(tlbi_op op, u64 addr, u32 level, u16 asid)
> +{
> +#ifdef CONFIG_ARM64_D128
> + union __u128_halves arg;
> +
> + arg.low = FIELD_PREP(TLBI_ASID_MASK, asid);
> + __tlbi_update_level(level, &arg.low);
> + arg.high = addr >> 12;
> +#else
> + u64 arg = __TLBI_VADDR(addr, asid);
>
> + __tlbi_update_level(level, &arg);
> +#endif
> op(arg);
> }

It would be a neater change if you could get away with something like this. If
you typedef tlbi_arg_t as u128, will FIELD_MODIFY() work? Not sure...


static __always_inline void __tlbi_level_asid(tlbi_op op, u64 addr, u32 level,
u16 asid)
{
tlbi_arg_t arg;

#ifdef CONFIG_ARM64_D128
arg = FIELD_PREP(TLBI_ASID_MASK, asid);
arg |= (addr >> 12) << 64;
#else
arg = __TLBI_VADDR(addr, asid);
#endif

if (alternative_has_cap_unlikely(ARM64_HAS_ARMv8_4_TTL) && level <= 3) {
u64 ttl = level | (get_trans_granule() << 2);

FIELD_MODIFY(TLBI_TTL_MASK, &arg, ttl);
}

op(arg);
}


>
> @@ -507,19 +543,33 @@ static __always_inline void ripas2e1is(tlbi_args_t arg)
> __tlbi_wrapper(ripas2e1is, arg);
> }
>
> -static __always_inline void __tlbi_range(tlbi_op op, u64 addr,
> - u16 asid, int scale, int num,
> - u32 level, bool lpa2)
> +static __always_inline u64 __tlbi_range_args_encode_comm(u16 asid, int scale, int num, u32 level)
> {
> u64 arg = 0;
>
> - arg |= FIELD_PREP(TLBIR_BADDR_MASK, addr >> (lpa2 ? 16 : PAGE_SHIFT));
> arg |= FIELD_PREP(TLBIR_TTL_MASK, level > 3 ? 0 : level);
> arg |= FIELD_PREP(TLBIR_NUM_MASK, num);
> arg |= FIELD_PREP(TLBIR_SCALE_MASK, scale);
> arg |= FIELD_PREP(TLBIR_TG_MASK, get_trans_granule());
> arg |= FIELD_PREP(TLBIR_ASID_MASK, asid);
>
> + return arg;
> +}
> +
> +static __always_inline void __tlbi_range(tlbi_op op, u64 addr,
> + u16 asid, int scale, int num,
> + u32 level, bool lpa2)
> +{
> +#ifdef CONFIG_ARM64_D128
> + union __u128_halves arg;
> +
> + arg.low = __tlbi_range_args_encode_comm(asid, scale, num, level);
> + arg.high = addr >> 12;
> +#else
> + u64 arg = __tlbi_range_args_encode_comm(asid, scale, num, level);
> +
> + arg |= FIELD_PREP(TLBIR_BADDR_MASK, addr >> (lpa2 ? 16 : PAGE_SHIFT));
> +#endif
> op(arg);
> }

And you could do the same thing here, keeping a single function with only a
minimal diff.

[...]