[PATCH 3/4] s390/pgtable: Add s390 support for page table check
From: Alexander Gordeev
Date: Mon Feb 23 2026 - 06:54:04 EST
From: Tobias Huschle <huschle@xxxxxxxxxxxxx>
Add page table check hooks into routines that modify user page
tables.
Unlike other architectures s390 does not have means to distinguish
between kernel and user page table entries. Rely on the fact the
page table check infrastructure itself operates on non-init_mm
memory spaces only.
Use the provided mm_struct to verify that the memory space is not
init_mm (aka not the kernel memory space) indeed. That check is
supposed to be succeeded already (on some code paths even twice).
If the passed memory space by contrast is init_mm that would be an
unexpected semantical change in generic code, so do VM_BUG_ON() in
such case.
Unset _SEGMENT_ENTRY_READ bit to indicate that pmdp_invalidate()
was applied against a huge PMD and is going to be updated by
set_pmd_at() shortly. The hook pmd_user_accessible_page() should
skip such entries until that, otherwise the page table accounting
falls apart and BUG_ON() gets hit as result.
The invalidated huge PMD entry should not be confused with a PROT_NONE
entry as reported by pmd_protnone(), though the entry characteristics
exactly match: _SEGMENT_ENTRY_LARGE is set while _SEGMENT_ENTRY_READ is
unset. Since pmd_protnone() implementation depends on NUMA_BALANCING
configuration option, it should not be used in pmd_user_accessible_page()
check, which is expected to be CONFIG_NUMA_BALANCING-agnostic.
Nevertheless, an invalidated huge PMD is technically still pmd_protnone()
entry and it should not break other code paths once _SEGMENT_ENTRY_READ
is unset. As of now, all pmd_protnone() checks are done under page table
locks or exercise GUP-fast and HMM code paths, which are expected to be
safe against concurrent page table updates.
Alternative approach would be using the last remaining unused PMD entry
bit 0x800 to indicate that pmdp_invalidate() was called on a PMD. That
would allow avoiding collisions with pmd_protnone() handling code paths,
but saving the bit is more preferable way to go.
Reviewed-by: Gerald Schaefer <gerald.schaefer@xxxxxxxxxxxxx>
Signed-off-by: Tobias Huschle <huschle@xxxxxxxxxxxxx>
Co-developed-by: Alexander Gordeev <agordeev@xxxxxxxxxxxxx>
Signed-off-by: Alexander Gordeev <agordeev@xxxxxxxxxxxxx>
---
arch/s390/Kconfig | 1 +
arch/s390/include/asm/pgtable.h | 54 ++++++++++++++++++++++++++++++---
2 files changed, 51 insertions(+), 4 deletions(-)
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index edc927d9e85a..7bda45d30455 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -154,6 +154,7 @@ config S390
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && CC_IS_CLANG
select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS
select ARCH_SUPPORTS_NUMA_BALANCING
+ select ARCH_SUPPORTS_PAGE_TABLE_CHECK
select ARCH_SUPPORTS_PER_VMA_LOCK
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 04ec9fee6498..67f5df20a57e 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -16,8 +16,10 @@
#include <linux/mm_types.h>
#include <linux/cpufeature.h>
#include <linux/page-flags.h>
+#include <linux/page_table_check.h>
#include <linux/radix-tree.h>
#include <linux/atomic.h>
+#include <linux/mmap_lock.h>
#include <asm/ctlreg.h>
#include <asm/bug.h>
#include <asm/page.h>
@@ -1190,6 +1192,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
/* At this point the reference through the mapping is still present */
if (mm_is_protected(mm) && pte_present(res))
WARN_ON_ONCE(uv_convert_from_secure_pte(res));
+ page_table_check_pte_clear(mm, addr, res);
return res;
}
@@ -1208,6 +1211,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
/* At this point the reference through the mapping is still present */
if (mm_is_protected(vma->vm_mm) && pte_present(res))
WARN_ON_ONCE(uv_convert_from_secure_pte(res));
+ page_table_check_pte_clear(vma->vm_mm, addr, res);
return res;
}
@@ -1231,6 +1235,9 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
} else {
res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
}
+
+ page_table_check_pte_clear(mm, addr, res);
+
/* Nothing to do */
if (!mm_is_protected(mm) || !pte_present(res))
return res;
@@ -1327,6 +1334,7 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
{
if (pte_present(entry))
entry = clear_pte_bit(entry, __pgprot(_PAGE_UNUSED));
+ page_table_check_ptes_set(mm, addr, ptep, entry, nr);
for (;;) {
set_pte(ptep, entry);
if (--nr == 0)
@@ -1703,6 +1711,7 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t entry)
{
+ page_table_check_pmd_set(mm, addr, pmdp, entry);
set_pmd(pmdp, entry);
}
@@ -1717,7 +1726,11 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
{
- return pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+ pmd_t pmd;
+
+ pmd = pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+ page_table_check_pmd_clear(mm, addr, pmd);
+ return pmd;
}
#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
@@ -1725,12 +1738,17 @@ static inline pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
unsigned long addr,
pmd_t *pmdp, int full)
{
+ pmd_t pmd;
+
if (full) {
- pmd_t pmd = *pmdp;
+ pmd = *pmdp;
set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+ page_table_check_pmd_clear(vma->vm_mm, addr, pmd);
return pmd;
}
- return pmdp_xchg_lazy(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+ pmd = pmdp_xchg_lazy(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+ page_table_check_pmd_clear(vma->vm_mm, addr, pmd);
+ return pmd;
}
#define __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH
@@ -1748,7 +1766,12 @@ static inline pmd_t pmdp_invalidate(struct vm_area_struct *vma,
VM_WARN_ON_ONCE(!pmd_present(pmd));
pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_INVALID));
- return pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd);
+#ifdef CONFIG_PAGE_TABLE_CHECK
+ pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_READ));
+#endif
+ page_table_check_pmd_set(vma->vm_mm, addr, pmdp, pmd);
+ pmd = pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd);
+ return pmd;
}
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
@@ -1783,6 +1806,29 @@ static inline int has_transparent_hugepage(void)
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#ifdef CONFIG_PAGE_TABLE_CHECK
+static inline bool pte_user_accessible_page(struct mm_struct *mm, unsigned long addr, pte_t pte)
+{
+ VM_BUG_ON(mm == &init_mm);
+
+ return pte_present(pte);
+}
+
+static inline bool pmd_user_accessible_page(struct mm_struct *mm, unsigned long addr, pmd_t pmd)
+{
+ VM_BUG_ON(mm == &init_mm);
+
+ return pmd_leaf(pmd) && (pmd_val(pmd) & _SEGMENT_ENTRY_READ);
+}
+
+static inline bool pud_user_accessible_page(struct mm_struct *mm, unsigned long addr, pud_t pud)
+{
+ VM_BUG_ON(mm == &init_mm);
+
+ return pud_leaf(pud);
+}
+#endif
+
/*
* 64 bit swap entry format:
* A page-table entry has some bits we have to treat in a special way.
--
2.51.0