[PATCH v1 5/7] KVM: s390: Avoid potentially sleeping while atomic when zapping pages
From: Claudio Imbrenda
Date: Thu May 28 2026 - 07:52:52 EST
Factor out try_get_locked_pte(), which behaves similarly to
get_locked_pte(), but does not attempt to allocate missing tables and
performs a spin_trylock() instead of blocking.
The new function is also exported, since it will be used in other
patches.
If intermediate entries are missing, there can be no pte swap entry to
free, so it's safe to ignore them.
This avoids potentially sleeping while atomic.
Signed-off-by: Claudio Imbrenda <imbrenda@xxxxxxxxxxxxx>
Fixes: e38c884df921 ("KVM: s390: Switch to new gmap")
---
arch/s390/include/asm/gmap_helpers.h | 1 +
arch/s390/mm/gmap_helpers.c | 111 ++++++++++++++++-----------
2 files changed, 68 insertions(+), 44 deletions(-)
diff --git a/arch/s390/include/asm/gmap_helpers.h b/arch/s390/include/asm/gmap_helpers.h
index 2d3ae421077e..d2b616604a46 100644
--- a/arch/s390/include/asm/gmap_helpers.h
+++ b/arch/s390/include/asm/gmap_helpers.h
@@ -12,5 +12,6 @@ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr);
void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end);
int gmap_helper_disable_cow_sharing(void);
void gmap_helper_try_set_pte_unused(struct mm_struct *mm, unsigned long vmaddr);
+pte_t *try_get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl);
#endif /* _ASM_S390_GMAP_HELPERS_H */
diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c
index f8789ffcc05c..7ba15f307bb0 100644
--- a/arch/s390/mm/gmap_helpers.c
+++ b/arch/s390/mm/gmap_helpers.c
@@ -34,6 +34,66 @@ static void ptep_zap_softleaf_entry(struct mm_struct *mm, softleaf_t entry)
swap_put_entries_direct(entry, 1);
}
+/**
+ * try_get_locked_pte() - like get_locked_pte(), but atomic and with trylock
+ * @mm: the mm
+ * @vmaddr: the userspace virtual address whose pte is to be found
+ * @ptl: will be set to the pointer to the lock used to lock the pte in case
+ * of success.
+ *
+ * This function returns the pointer to the pte corresponding to @addr in @mm,
+ * similarly to get_locked_pte(). Unlike get_locked_pte(), no attempt is made
+ * to allocate missing page tables. If a missing or large entry is found, the
+ * function will return NULL. If the ptl lock is contended, NULL is returned.
+ *
+ * In case of success, *@ptl will point to the locked pte lock for the returned
+ * pte, like get_locked_pte() does.
+ *
+ * Context: mmap_lock or vma lock for read or for write needs to be held.
+ * Return: the pointer to the pte corresponding to @addr in @mm, if possible,
+ * otherwise NULL.
+ */
+pte_t *try_get_locked_pte(struct mm_struct *mm, unsigned long vmaddr, spinlock_t **ptl)
+{
+ pmd_t *pmdp, pmd, pmdval;
+ pud_t *pudp, pud;
+ p4d_t *p4dp, p4d;
+ pgd_t *pgdp, pgd;
+ pte_t *ptep;
+
+ pgdp = pgd_offset(mm, vmaddr);
+ pgd = pgdp_get(pgdp);
+ if (pgd_none(pgd) || !pgd_present(pgd))
+ return NULL;
+ p4dp = p4d_offset(pgdp, vmaddr);
+ p4d = p4dp_get(p4dp);
+ if (p4d_none(p4d) || !p4d_present(p4d))
+ return NULL;
+ pudp = pud_offset(p4dp, vmaddr);
+ pud = pudp_get(pudp);
+ if (pud_none(pud) || pud_leaf(pud) || !pud_present(pud))
+ return NULL;
+ pmdp = pmd_offset(pudp, vmaddr);
+ pmd = pmdp_get_lockless(pmdp);
+ if (pmd_none(pmd) || pmd_leaf(pmd) || !pmd_present(pmd))
+ return NULL;
+ ptep = pte_offset_map_rw_nolock(mm, pmdp, vmaddr, &pmdval, ptl);
+ if (!ptep)
+ return NULL;
+
+ if (spin_trylock(*ptl)) {
+ if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(pmdp)))) {
+ pte_unmap_unlock(ptep, *ptl);
+ return NULL;
+ }
+ return ptep;
+ }
+
+ pte_unmap(ptep);
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(try_get_locked_pte);
+
/**
* gmap_helper_zap_one_page() - discard a page if it was swapped.
* @mm: the mm
@@ -46,7 +106,7 @@ static void ptep_zap_softleaf_entry(struct mm_struct *mm, softleaf_t entry)
void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
{
struct vm_area_struct *vma;
- spinlock_t *ptl;
+ spinlock_t *ptl; /* Lock for the host (userspace) page table */
pte_t *ptep;
mmap_assert_locked(mm);
@@ -57,7 +117,7 @@ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
return;
/* Get pointer to the page table entry */
- ptep = get_locked_pte(mm, vmaddr, &ptl);
+ ptep = try_get_locked_pte(mm, vmaddr, &ptl);
if (unlikely(!ptep))
return;
if (pte_swap(*ptep)) {
@@ -113,37 +173,9 @@ EXPORT_SYMBOL_GPL(gmap_helper_discard);
*/
void gmap_helper_try_set_pte_unused(struct mm_struct *mm, unsigned long vmaddr)
{
- pmd_t *pmdp, pmd, pmdval;
- pud_t *pudp, pud;
- p4d_t *p4dp, p4d;
- pgd_t *pgdp, pgd;
spinlock_t *ptl; /* Lock for the host (userspace) page table */
pte_t *ptep;
- pgdp = pgd_offset(mm, vmaddr);
- pgd = pgdp_get(pgdp);
- if (pgd_none(pgd) || !pgd_present(pgd))
- return;
-
- p4dp = p4d_offset(pgdp, vmaddr);
- p4d = p4dp_get(p4dp);
- if (p4d_none(p4d) || !p4d_present(p4d))
- return;
-
- pudp = pud_offset(p4dp, vmaddr);
- pud = pudp_get(pudp);
- if (pud_none(pud) || pud_leaf(pud) || !pud_present(pud))
- return;
-
- pmdp = pmd_offset(pudp, vmaddr);
- pmd = pmdp_get_lockless(pmdp);
- if (pmd_none(pmd) || pmd_leaf(pmd) || !pmd_present(pmd))
- return;
-
- ptep = pte_offset_map_rw_nolock(mm, pmdp, vmaddr, &pmdval, &ptl);
- if (!ptep)
- return;
-
/*
* Several paths exists that takes the ptl lock and then call the
* mmu_notifier, which takes the mmu_lock. The unmap path, instead,
@@ -156,21 +188,12 @@ void gmap_helper_try_set_pte_unused(struct mm_struct *mm, unsigned long vmaddr)
* If the lock is contended the bit is not set and the deadlock is
* avoided.
*/
- if (spin_trylock(ptl)) {
- /*
- * Make sure the pte we are touching is still the correct
- * one. In theory this check should not be needed, but
- * better safe than sorry.
- * Disabling interrupts or holding the mmap lock is enough to
- * guarantee that no concurrent updates to the page tables
- * are possible.
- */
- if (likely(pmd_same(pmdval, pmdp_get_lockless(pmdp))))
- __atomic64_or(_PAGE_UNUSED, (long *)ptep);
- spin_unlock(ptl);
- }
+ ptep = try_get_locked_pte(mm, vmaddr, &ptl);
+ if (!ptep)
+ return;
- pte_unmap(ptep);
+ __atomic64_or(_PAGE_UNUSED, (long *)ptep);
+ pte_unmap_unlock(ptep, ptl);
}
EXPORT_SYMBOL_GPL(gmap_helper_try_set_pte_unused);
--
2.54.0