[PATCH v2 3/5] KVM: s390: vsie: Fix unshadowing logic
From: Claudio Imbrenda
Date: Tue May 12 2026 - 14:17:13 EST
In some cases (i.e. under extreme memory pressure on the host),
attempting to shadow memory will result in the same memory being
unshadowed, causing a loop.
Add a PGSTE bit to distinguish between shadowed memory and shadowed DAT
tables, fix the unshadowing logic in _gmap_ptep_xchg() to prevent
unnecessary unshadowing and perform better checks.
Also fix the unshadowing logic in _gmap_crstep_xchg_atomic() which did
not unshadow properly when the large page would become unprotected.
Opportunistilcally add a check in gmap_protect_rmap() to make sure it
won't be called with level == TABLE_TYPE_PAGE_TABLE.
Signed-off-by: Claudio Imbrenda <imbrenda@xxxxxxxxxxxxx>
Fixes: a2c17f9270cc ("KVM: s390: New gmap code")
---
arch/s390/kvm/dat.c | 1 +
arch/s390/kvm/dat.h | 3 ++-
arch/s390/kvm/gaccess.c | 1 +
arch/s390/kvm/gmap.c | 3 ++-
arch/s390/kvm/gmap.h | 22 +++++++++++++++++++---
5 files changed, 25 insertions(+), 5 deletions(-)
diff --git a/arch/s390/kvm/dat.c b/arch/s390/kvm/dat.c
index 7b8d70fe406d..4a41c0247ffa 100644
--- a/arch/s390/kvm/dat.c
+++ b/arch/s390/kvm/dat.c
@@ -267,6 +267,7 @@ static int dat_split_ste(struct kvm_s390_mmu_cache *mc, union pmd *pmdp, gfn_t g
/* No need to take locks as the page table is not installed yet. */
pgste_init.prefix_notif = old.s.fc1.prefix_notif;
pgste_init.vsie_notif = old.s.fc1.vsie_notif;
+ pgste_init.vsie_gmem = old.s.fc1.vsie_notif;
pgste_init.pcl = uses_skeys && init.h.i;
dat_init_pgstes(pt, pgste_init.val);
} else {
diff --git a/arch/s390/kvm/dat.h b/arch/s390/kvm/dat.h
index 8f8278c44879..873e13ac5a27 100644
--- a/arch/s390/kvm/dat.h
+++ b/arch/s390/kvm/dat.h
@@ -145,7 +145,8 @@ union pgste {
unsigned long cmma_d : 1; /* Dirty flag for CMMA bits */
unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
unsigned long vsie_notif : 1; /* Referenced in a shadow table */
- unsigned long : 5;
+ unsigned long vsie_gmem : 1; /* Contains nested guest memory */
+ unsigned long : 4;
unsigned long : 8;
};
struct {
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index b07accd19618..4f8d5592c9a9 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -1445,6 +1445,7 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union
} else {
pgste = _gmap_ptep_xchg(sg->parent, ptep_h, newpte, pgste, f->gfn, false);
pgste.vsie_notif = 1;
+ pgste.vsie_gmem = 1;
}
pgste_set_unlock(ptep_h, pgste);
if (rc)
diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c
index 10c98c8cc1d8..8cff0cf5ce24 100644
--- a/arch/s390/kvm/gmap.c
+++ b/arch/s390/kvm/gmap.c
@@ -1031,7 +1031,8 @@ int gmap_protect_rmap(struct kvm_s390_mmu_cache *mc, struct gmap *sg, gfn_t p_gf
union pte pte;
int flags, rc;
- KVM_BUG_ON(!is_shadow(sg), sg->kvm);
+ if (KVM_BUG_ON(!is_shadow(sg) || level <= TABLE_TYPE_PAGE_TABLE, sg->kvm))
+ return -EINVAL;
lockdep_assert_held(&sg->parent->children_lock);
flags = DAT_WALK_SPLIT_ALLOC | (uses_skeys(sg->parent) ? DAT_WALK_USES_SKEYS : 0);
diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h
index 96ee1395a592..e490f2995a26 100644
--- a/arch/s390/kvm/gmap.h
+++ b/arch/s390/kvm/gmap.h
@@ -167,6 +167,15 @@ static inline bool gmap_unmap_prefix(struct gmap *gmap, gfn_t gfn, gfn_t end)
return _gmap_unmap_prefix(gmap, gfn, end, false);
}
+static inline bool pte_needs_unshadow(union pte oldpte, union pte newpte, union pgste pgste)
+{
+ if (!pgste.vsie_notif)
+ return false;
+ if (pgste.vsie_gmem)
+ return (oldpte.h.p != newpte.h.p) || newpte.h.i;
+ return !newpte.h.p || !newpte.s.pr;
+}
+
static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, union pte newpte,
union pgste pgste, gfn_t gfn, bool needs_lock)
{
@@ -180,8 +189,9 @@ static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, un
pgste.prefix_notif = 0;
gmap_unmap_prefix(gmap, gfn, gfn + 1);
}
- if (pgste.vsie_notif && (ptep->h.p != newpte.h.p || newpte.h.i)) {
+ if (pte_needs_unshadow(*ptep, newpte, pgste)) {
pgste.vsie_notif = 0;
+ pgste.vsie_gmem = 0;
if (needs_lock)
gmap_handle_vsie_unshadow_event(gmap, gfn);
else
@@ -198,6 +208,13 @@ static inline union pgste gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, uni
return _gmap_ptep_xchg(gmap, ptep, newpte, pgste, gfn, true);
}
+static inline bool crste_needs_unshadow(union crste oldcrste, union crste newcrste)
+{
+ if (!oldcrste.s.fc1.vsie_notif)
+ return false;
+ return (newcrste.h.p != oldcrste.h.p) || newcrste.h.i || !newcrste.s.fc1.vsie_notif;
+}
+
static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep,
union crste oldcrste, union crste newcrste,
gfn_t gfn, bool needs_lock)
@@ -216,8 +233,7 @@ static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, unio
newcrste.s.fc1.prefix_notif = 0;
gmap_unmap_prefix(gmap, gfn, gfn + align);
}
- if (crste_leaf(oldcrste) && oldcrste.s.fc1.vsie_notif &&
- (newcrste.h.p || newcrste.h.i || !newcrste.s.fc1.vsie_notif)) {
+ if (crste_leaf(oldcrste) && crste_needs_unshadow(oldcrste, newcrste)) {
newcrste.s.fc1.vsie_notif = 0;
if (needs_lock)
gmap_handle_vsie_unshadow_event(gmap, gfn);
--
2.54.0