[PATCH v3 3/3] riscv: preserve A/D and soft-dirty state across PTE updates
From: Yunhui Cui
Date: Tue Jun 09 2026 - 09:13:12 EST
Use cmpxchg-based PTE updates so software permission changes do not lose
concurrent A/D updates from hardware. Preserve soft-dirty state as well,
since RISC-V marks PTEs dirty and soft-dirty together.
Signed-off-by: Yunhui Cui <cuiyunhui@xxxxxxxxxxxxx>
Reviewed-by: Qingwei Hu <qingwei.hu@xxxxxxxxxxxxx>
---
arch/riscv/include/asm/pgtable.h | 27 +++++++++----
arch/riscv/mm/pgtable.c | 68 ++++++++++++++++++++++++++------
2 files changed, 77 insertions(+), 18 deletions(-)
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 5d5756bda82e3..02286b48dc471 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -678,15 +678,21 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
static inline void ptep_set_wrprotect(struct mm_struct *mm,
unsigned long address, pte_t *ptep)
{
- pte_t read_pte = READ_ONCE(*ptep);
+ pte_t old_pte;
+ pte_t pte;
/*
* ptep_set_wrprotect can be called for shadow stack ranges too.
* shadow stack memory is XWR = 010 and thus clearing _PAGE_WRITE will lead to
* encoding 000b which is wrong encoding with V = 1. This should lead to page fault
* but we dont want this wrong configuration to be set in page tables.
*/
- atomic_long_set((atomic_long_t *)ptep,
- ((pte_val(read_pte) & ~(unsigned long)_PAGE_WRITE) | _PAGE_READ));
+ pte = READ_ONCE(*ptep);
+ do {
+ old_pte = pte;
+ pte = pte_wrprotect(pte);
+ pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte),
+ pte_val(pte));
+ } while (pte_val(pte) != pte_val(old_pte));
}
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
@@ -742,14 +748,14 @@ static inline pgprot_t pgprot_writecombine(pgprot_t _prot)
#define pgprot_dmacoherent pgprot_writecombine
/*
- * Both Svade and Svadu control the hardware behavior when the PTE A/D bits need to be set. By
- * default the M-mode firmware enables the hardware updating scheme when only Svadu is present in
- * DT.
+ * Both Svade and Svadu control the hardware behavior when the PTE A/D bits
+ * need to be set. The core MM code only cares whether hardware updating of
+ * the accessed/dirty state is currently active.
*/
#define arch_has_hw_pte_young arch_has_hw_pte_young
static inline bool arch_has_hw_pte_young(void)
{
- return riscv_has_extension_unlikely(RISCV_ISA_EXT_SVADU);
+ return riscv_has_hw_pte_ad_updating();
}
/*
@@ -1040,6 +1046,13 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
ptep_set_wrprotect(mm, address, (pte_t *)pmdp);
}
+#define __HAVE_ARCH_PUDP_SET_WRPROTECT
+static inline void pudp_set_wrprotect(struct mm_struct *mm,
+ unsigned long address, pud_t *pudp)
+{
+ ptep_set_wrprotect(mm, address, (pte_t *)pudp);
+}
+
#define pmdp_establish pmdp_establish
static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp, pmd_t pmd)
diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c
index 9c4427d0b1874..98eed19ea70de 100644
--- a/arch/riscv/mm/pgtable.c
+++ b/arch/riscv/mm/pgtable.c
@@ -5,23 +5,55 @@
#include <linux/kernel.h>
#include <linux/pgtable.h>
+#define RISCV_PTE_ACCESS_FLAG_MASK (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC | \
+ _PAGE_ACCESSED | _PAGE_DIRTY | \
+ _PAGE_SOFT_DIRTY)
+
+static inline unsigned long riscv_pte_access_flags(unsigned long cur,
+ unsigned long entry)
+{
+ unsigned long pteval;
+ unsigned long preserved_flags;
+
+ preserved_flags = _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_SOFT_DIRTY;
+ pteval = cur & ~RISCV_PTE_ACCESS_FLAG_MASK;
+ pteval |= entry & (RISCV_PTE_ACCESS_FLAG_MASK & ~preserved_flags);
+ pteval |= (cur | entry) & preserved_flags;
+
+ return pteval;
+}
+
int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep,
pte_t entry, int dirty)
{
+ unsigned long old_pteval;
+ unsigned long new_pteval;
+ unsigned long prev_pteval;
+ bool changed;
+
+ old_pteval = pte_val(ptep_get(ptep));
+ do {
+ new_pteval = riscv_pte_access_flags(old_pteval, pte_val(entry));
+ if (new_pteval == old_pteval)
+ break;
+
+ prev_pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval,
+ new_pteval);
+ if (prev_pteval == old_pteval)
+ break;
+
+ old_pteval = prev_pteval;
+ } while (1);
+
+ changed = old_pteval != new_pteval;
if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SVVPTC)) {
- if (!pte_same(ptep_get(ptep), entry)) {
- __set_pte_at(vma->vm_mm, ptep, entry);
- /* Here only not svadu is impacted */
+ if (changed)
flush_tlb_page(vma, address);
- return true;
- }
- return false;
+ return changed;
}
- if (!pte_same(ptep_get(ptep), entry))
- __set_pte_at(vma->vm_mm, ptep, entry);
/*
* update_mmu_cache will unconditionally execute, handling both
* the case that the PTE changed and the spurious fault case.
@@ -32,9 +64,23 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
bool ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep)
{
- if (!pte_young(ptep_get(ptep)))
- return false;
- return test_and_clear_bit(_PAGE_ACCESSED_OFFSET, &pte_val(*ptep));
+ unsigned long old_pteval;
+ unsigned long new_pteval;
+ unsigned long prev_pteval;
+
+ old_pteval = pte_val(ptep_get(ptep));
+ do {
+ if (!(old_pteval & _PAGE_ACCESSED))
+ return false;
+
+ new_pteval = pte_val(pte_mkold(__pte(old_pteval)));
+ prev_pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval,
+ new_pteval);
+ if (prev_pteval == old_pteval)
+ return true;
+
+ old_pteval = prev_pteval;
+ } while (1);
}
EXPORT_SYMBOL_GPL(ptep_test_and_clear_young);
--
2.39.5