[PATCH v3 7/8] mm/x86: Add missing pud helpers

From: Peter Xu
Date: Mon Jul 15 2024 - 15:23:33 EST


These new helpers will be needed for pud entry updates soon. Introduce
these helpers by referencing the pmd ones. Namely:

- pudp_invalidate()
- pud_modify()

Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Cc: x86@xxxxxxxxxx
Signed-off-by: Peter Xu <peterx@xxxxxxxxxx>
---
arch/x86/include/asm/pgtable.h | 55 +++++++++++++++++++++++++++++-----
arch/x86/mm/pgtable.c | 12 ++++++++
2 files changed, 59 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index cdf044c2ad6e..701593c53f3b 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -782,6 +782,12 @@ static inline pmd_t pmd_mkinvalid(pmd_t pmd)
__pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
}

+static inline pud_t pud_mkinvalid(pud_t pud)
+{
+ return pfn_pud(pud_pfn(pud),
+ __pgprot(pud_flags(pud) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
+}
+
static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);

static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
@@ -829,14 +835,8 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
pmd_result = __pmd(val);

/*
- * To avoid creating Write=0,Dirty=1 PMDs, pte_modify() needs to avoid:
- * 1. Marking Write=0 PMDs Dirty=1
- * 2. Marking Dirty=1 PMDs Write=0
- *
- * The first case cannot happen because the _PAGE_CHG_MASK will filter
- * out any Dirty bit passed in newprot. Handle the second case by
- * going through the mksaveddirty exercise. Only do this if the old
- * value was Write=1 to avoid doing this on Shadow Stack PTEs.
+ * Avoid creating shadow stack PMD by accident. See comment in
+ * pte_modify().
*/
if (oldval & _PAGE_RW)
pmd_result = pmd_mksaveddirty(pmd_result);
@@ -846,6 +846,29 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
return pmd_result;
}

+static inline pud_t pud_modify(pud_t pud, pgprot_t newprot)
+{
+ pudval_t val = pud_val(pud), oldval = val;
+ pud_t pud_result;
+
+ val &= _HPAGE_CHG_MASK;
+ val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
+ val = flip_protnone_guard(oldval, val, PHYSICAL_PUD_PAGE_MASK);
+
+ pud_result = __pud(val);
+
+ /*
+ * Avoid creating shadow stack PUD by accident. See comment in
+ * pte_modify().
+ */
+ if (oldval & _PAGE_RW)
+ pud_result = pud_mksaveddirty(pud_result);
+ else
+ pud_result = pud_clear_saveddirty(pud_result);
+
+ return pud_result;
+}
+
/*
* mprotect needs to preserve PAT and encryption bits when updating
* vm_page_prot
@@ -1384,10 +1407,26 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
}
#endif

+static inline pud_t pudp_establish(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp, pud_t pud)
+{
+ page_table_check_pud_set(vma->vm_mm, pudp, pud);
+ if (IS_ENABLED(CONFIG_SMP)) {
+ return xchg(pudp, pud);
+ } else {
+ pud_t old = *pudp;
+ WRITE_ONCE(*pudp, pud);
+ return old;
+ }
+}
+
#define __HAVE_ARCH_PMDP_INVALIDATE_AD
extern pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp);

+pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp);
+
/*
* Page table pages are page-aligned. The lower half of the top
* level is used for userspace and the top half for the kernel.
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 564b8945951e..fa77411bb266 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -641,6 +641,18 @@ pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address,
}
#endif

+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
+ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
+pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp)
+{
+ VM_WARN_ON_ONCE(!pud_present(*pudp));
+ pud_t old = pudp_establish(vma, address, pudp, pud_mkinvalid(*pudp));
+ flush_pud_tlb_range(vma, address, address + HPAGE_PUD_SIZE);
+ return old;
+}
+#endif
+
/**
* reserve_top_address - reserves a hole in the top of kernel address space
* @reserve - size of hole to reserve
--
2.45.0