[PATCH] arm: lpae: fix non-atomic page table entry update issue

From: Wang YanQing

Date: Sat Mar 14 2026 - 15:11:48 EST



The ARM Architecture Reference Manual explicitly dictates that writes of 64-bit
translation table descriptors must be single-copy atomic, to archieve this 64-bit
atomicity on a 32-bit architecture, the linux kernel relies on the STRD (Store
Register Dual) instruction, but the copy_pmd() in pgtable-3level.h is C code, then
compiler could do very crazy optimization for it and generate code that break the
atomicity, for example, we get below copy_pmd() assembly code with gcc 12.4.0
(Using CC_OPTIMIZE_FOR_PERFORMANCE, it is the default compile option):
"
gdb vmlinux
gdb disassemble do_translation_fault
gdb ...
gdb 0xc020e544 <+136>: ldr.w r4, [r0, r1, lsl #3] @load low 32-bit of pmdps
gdb 0xc020e548 <+140>: ldr r0, [r6, #4] @load high 32-bit of pmdps
gdb 0xc020e54a <+142>: orrs.w r6, r4, r0 @ pmd_none(pmd_k[index])
gdb 0xc020e54e <+146>: beq.n 0xc020e586 <do_translation_fault+202>
gdb ...
gdb 0xc020e562 <+166>: str.w r4, [r5, r1, lsl #3] @store low 32-bit to pmdpd
gdb 0xc020e566 <+170>: str r0, [r2, #4] @store hight 32-bit to pmdpd

The code breaks the atomicity and valid bit is in the low 32-bit, page table walker
could see and cache the partial write entry, this will cause very strange
translation-related issues when next page table (level3 PTE table) physical address
is larger than 32-bits.

So let's use WRITE_ONCE() to protect the page table entry update functions from crazy
optimization.

Signed-off-by: Wang YanQing <udknight@xxxxxxxxx>
---
arch/arm/include/asm/pgtable-3level.h | 28 +++++++++++++--------------
1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 7b71a3d414b7..b077174a4231 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -120,15 +120,15 @@
PMD_TYPE_SECT)
#define pmd_leaf(pmd) pmd_sect(pmd)

-#define pud_clear(pudp) \
- do { \
- *pudp = __pud(0); \
- clean_pmd_entry(pudp); \
+#define pud_clear(pudp) \
+ do { \
+ WRITE_ONCE(*pudp, __pud(0)); \
+ clean_pmd_entry(pudp); \
} while (0)

#define set_pud(pudp, pud) \
do { \
- *pudp = pud; \
+ WRITE_ONCE(*pudp, pud); \
flush_pmd_entry(pudp); \
} while (0)

@@ -139,16 +139,16 @@ static inline pmd_t *pud_pgtable(pud_t pud)

#define pmd_bad(pmd) (!(pmd_val(pmd) & PMD_TABLE_BIT))

-#define copy_pmd(pmdpd,pmdps) \
- do { \
- *pmdpd = *pmdps; \
- flush_pmd_entry(pmdpd); \
+#define copy_pmd(pmdpd, pmdps) \
+ do { \
+ WRITE_ONCE(*pmdpd, READ_ONCE(*pmdps)); \
+ flush_pmd_entry(pmdpd); \
} while (0)

-#define pmd_clear(pmdp) \
- do { \
- *pmdp = __pmd(0); \
- clean_pmd_entry(pmdp); \
+#define pmd_clear(pmdp) \
+ do { \
+ WRITE_ONCE(*pmdp, __pmd(0)); \
+ clean_pmd_entry(pmdp); \
} while (0)

/*
@@ -241,7 +241,7 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
else
pmd_val(pmd) |= PMD_SECT_AP2;

- *pmdp = __pmd(pmd_val(pmd) | PMD_SECT_nG);
+ WRITE_ONCE(*pmdp, __pmd(pmd_val(pmd) | PMD_SECT_nG));
flush_pmd_entry(pmdp);
}

--
2.34.1