[PATCH v1 15/27] KVM: S390: Refactor gmap

From: Steffen Eiden

Date: Thu Apr 02 2026 - 00:44:45 EST


Refactor gmap code such that a second s390 (host) KVM implementation can
use the gmap code as well. Move relevant definitions into the shared
kvm_host.h. Move mmu code and traces from s390 to gmap.

Signed-off-by: Steffen Eiden <seiden@xxxxxxxxxxxxx>
---
arch/s390/include/asm/kvm_host.h | 9 ++
arch/s390/include/asm/kvm_host_s390.h | 11 +-
arch/s390/kvm/gmap/Makefile | 2 +-
arch/s390/kvm/gmap/faultin.c | 11 +-
arch/s390/kvm/gmap/gmap.c | 11 +-
arch/s390/kvm/gmap/gmap.h | 11 ++
arch/s390/kvm/gmap/mmu.c | 154 ++++++++++++++++++++++++++
arch/s390/kvm/gmap/trace-gmap.h | 59 ++++++++++
arch/s390/kvm/s390/s390.c | 116 +------------------
arch/s390/kvm/s390/s390.h | 16 +++
arch/s390/kvm/s390/trace.h | 14 ---
11 files changed, 272 insertions(+), 142 deletions(-)
create mode 100644 arch/s390/kvm/gmap/mmu.c
create mode 100644 arch/s390/kvm/gmap/trace-gmap.h

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 6ff643ac0d15..1c20168a3ef5 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -5,4 +5,13 @@

#include <asm/kvm_host_s390.h>

+#define PGM_PROTECTION 0x04
+#define PGM_ADDRESSING 0x05
+#define PGM_SEGMENT_TRANSLATION 0x10
+#define PGM_PAGE_TRANSLATION 0x11
+#define PGM_ASCE_TYPE 0x38
+#define PGM_REGION_FIRST_TRANS 0x39
+#define PGM_REGION_SECOND_TRANS 0x3a
+#define PGM_REGION_THIRD_TRANS 0x3b
+
#endif
diff --git a/arch/s390/include/asm/kvm_host_s390.h b/arch/s390/include/asm/kvm_host_s390.h
index 2d62a8ff8008..c528d7600bed 100644
--- a/arch/s390/include/asm/kvm_host_s390.h
+++ b/arch/s390/include/asm/kvm_host_s390.h
@@ -153,8 +153,7 @@ struct kvm_vcpu_stat {
#define PGM_OPERATION 0x01
#define PGM_PRIVILEGED_OP 0x02
#define PGM_EXECUTE 0x03
-#define PGM_PROTECTION 0x04
-#define PGM_ADDRESSING 0x05
+/* 0x04 & 0x05 defined in kvm_host.h */
#define PGM_SPECIFICATION 0x06
#define PGM_DATA 0x07
#define PGM_FIXED_POINT_OVERFLOW 0x08
@@ -165,8 +164,7 @@ struct kvm_vcpu_stat {
#define PGM_HFP_EXPONENT_UNDERFLOW 0x0d
#define PGM_HFP_SIGNIFICANCE 0x0e
#define PGM_HFP_DIVIDE 0x0f
-#define PGM_SEGMENT_TRANSLATION 0x10
-#define PGM_PAGE_TRANSLATION 0x11
+/* 0x10 & 0x11 defined in kvm_host.h */
#define PGM_TRANSLATION_SPEC 0x12
#define PGM_SPECIAL_OPERATION 0x13
#define PGM_OPERAND 0x15
@@ -196,10 +194,7 @@ struct kvm_vcpu_stat {
#define PGM_STACK_SPECIFICATION 0x32
#define PGM_STACK_TYPE 0x33
#define PGM_STACK_OPERATION 0x34
-#define PGM_ASCE_TYPE 0x38
-#define PGM_REGION_FIRST_TRANS 0x39
-#define PGM_REGION_SECOND_TRANS 0x3a
-#define PGM_REGION_THIRD_TRANS 0x3b
+/* 0x38 - 0x3b defined in kvm_host.h */
#define PGM_SECURE_STORAGE_ACCESS 0x3d
#define PGM_NON_SECURE_STORAGE_ACCESS 0x3e
#define PGM_SECURE_STORAGE_VIOLATION 0x3f
diff --git a/arch/s390/kvm/gmap/Makefile b/arch/s390/kvm/gmap/Makefile
index 21967ed88877..140914c5c14f 100644
--- a/arch/s390/kvm/gmap/Makefile
+++ b/arch/s390/kvm/gmap/Makefile
@@ -2,4 +2,4 @@

GMAP ?= ../gmap

-kvm-y += $(GMAP)/dat.o $(GMAP)/gmap.o $(GMAP)/faultin.o
+kvm-y += $(GMAP)/dat.o $(GMAP)/gmap.o $(GMAP)/faultin.o $(GMAP)/mmu.o
diff --git a/arch/s390/kvm/gmap/faultin.c b/arch/s390/kvm/gmap/faultin.c
index e37cd18200f5..26b7d4cb1e86 100644
--- a/arch/s390/kvm/gmap/faultin.c
+++ b/arch/s390/kvm/gmap/faultin.c
@@ -9,10 +9,15 @@
#include <linux/kvm_host.h>

#include "gmap.h"
-#include "trace.h"
#include "faultin.h"
-
-bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu);
+#ifdef KVM_S390_ARM64
+#include "arm.h"
+#else
+#include "s390.h"
+#endif
+
+#define CREATE_TRACE_POINTS
+#include "trace-gmap.h"

/*
* kvm_s390_faultin_gfn() - handle a dat fault.
diff --git a/arch/s390/kvm/gmap/gmap.c b/arch/s390/kvm/gmap/gmap.c
index 1312d7882824..8c2cc65e7a85 100644
--- a/arch/s390/kvm/gmap/gmap.c
+++ b/arch/s390/kvm/gmap/gmap.c
@@ -21,14 +21,13 @@

#include "dat.h"
#include "gmap.h"
+#ifdef KVM_S390_ARM64
+#include "arm.h"
+#else
#include "s390.h"
+#endif
#include "faultin.h"

-static inline bool kvm_s390_is_in_sie(struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.sie_block->prog0c & PROG_IN_SIE;
-}
-
static int gmap_limit_to_type(gfn_t limit)
{
if (!limit)
@@ -253,6 +252,7 @@ int s390_replace_asce(struct gmap *gmap)

bool _gmap_unmap_prefix(struct gmap *gmap, gfn_t gfn, gfn_t end, bool hint)
{
+#ifndef KVM_S390_ARM64
struct kvm *kvm = gmap->kvm;
struct kvm_vcpu *vcpu;
gfn_t prefix_gfn;
@@ -271,6 +271,7 @@ bool _gmap_unmap_prefix(struct gmap *gmap, gfn_t gfn, gfn_t end, bool hint)
kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
}
}
+#endif /* ifdef KVM_S390_ARM64 */
return true;
}

diff --git a/arch/s390/kvm/gmap/gmap.h b/arch/s390/kvm/gmap/gmap.h
index e2b3bd457782..c70f0c357b5c 100644
--- a/arch/s390/kvm/gmap/gmap.h
+++ b/arch/s390/kvm/gmap/gmap.h
@@ -241,4 +241,15 @@ static inline bool gmap_is_shadow_valid(struct gmap *sg, union asce asce, int ed
return sg->guest_asce.val == asce.val && sg->edat_level == edat_level;
}

+int gmap_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log);
+int gmap_prepare_memory_region(struct kvm *kvm,
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
+ enum kvm_mr_change change);
+void gmap_commit_memory_region(struct kvm *kvm,
+ struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new,
+ enum kvm_mr_change change);
+bool gmap_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
+
#endif /* ARCH_KVM_GMAP_GMAP_H */
diff --git a/arch/s390/kvm/gmap/mmu.c b/arch/s390/kvm/gmap/mmu.c
new file mode 100644
index 000000000000..2d81466e49cb
--- /dev/null
+++ b/arch/s390/kvm/gmap/mmu.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/kvm_types.h>
+#include <linux/kvm_host.h>
+
+#ifdef KVM_S390_ARM64
+#include "arm.h"
+#else
+#include "s390.h"
+#endif
+#include "gmap.h"
+#include "dat.h"
+
+/*
+ * Get (and clear) the dirty memory log for a memory slot.
+ */
+int gmap_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+{
+ int r;
+ unsigned long n;
+ struct kvm_memory_slot *memslot;
+ int is_dirty;
+
+ if (kvm_is_ucontrol(kvm))
+ return -EINVAL;
+
+ mutex_lock(&kvm->slots_lock);
+
+ r = -EINVAL;
+ if (log->slot >= KVM_USER_MEM_SLOTS)
+ goto out;
+
+ r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
+ if (r)
+ goto out;
+
+ /* Clear the dirty log */
+ if (is_dirty) {
+ n = kvm_dirty_bitmap_bytes(memslot);
+ memset(memslot->dirty_bitmap, 0, n);
+ }
+ r = 0;
+out:
+ mutex_unlock(&kvm->slots_lock);
+ return r;
+}
+
+int gmap_prepare_memory_region(struct kvm *kvm,
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
+ enum kvm_mr_change change)
+{
+ gpa_t size;
+
+ if (kvm_is_ucontrol(kvm) && new->id < KVM_USER_MEM_SLOTS)
+ return -EINVAL;
+
+ /* When we are protected, we should not change the memory slots */
+ if (kvm_s390_pv_get_handle(kvm))
+ return -EINVAL;
+
+ if (change != KVM_MR_DELETE && change != KVM_MR_FLAGS_ONLY) {
+ /*
+ * A few sanity checks. We can have memory slots which have to be
+ * located/ended at a segment boundary (1MB). The memory in userland is
+ * ok to be fragmented into various different vmas. It is okay to mmap()
+ * and munmap() stuff in this slot after doing this call at any time
+ */
+
+ if (new->userspace_addr & 0xffffful)
+ return -EINVAL;
+
+ size = new->npages * PAGE_SIZE;
+ if (size & 0xffffful)
+ return -EINVAL;
+
+ if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
+ return -EINVAL;
+ }
+
+ if (!kvm_s390_is_migration_mode(kvm))
+ return 0;
+
+ /*
+ * Turn off migration mode when:
+ * - userspace creates a new memslot with dirty logging off,
+ * - userspace modifies an existing memslot (MOVE or FLAGS_ONLY) and
+ * dirty logging is turned off.
+ * Migration mode expects dirty page logging being enabled to store
+ * its dirty bitmap.
+ */
+ if (change != KVM_MR_DELETE &&
+ !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
+ WARN(kvm_s390_vm_stop_migration(kvm),
+ "Failed to stop migration mode");
+
+ return 0;
+}
+
+void gmap_commit_memory_region(struct kvm *kvm,
+ struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new,
+ enum kvm_mr_change change)
+{
+ struct kvm_s390_mmu_cache *mc = NULL;
+ int rc = 0;
+
+ if (change == KVM_MR_FLAGS_ONLY)
+ return;
+
+ mc = kvm_s390_new_mmu_cache();
+ if (!mc) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ scoped_guard(write_lock, &kvm->mmu_lock) {
+ switch (change) {
+ case KVM_MR_DELETE:
+ rc = dat_delete_slot(mc, kvm->arch.gmap->asce, old->base_gfn, old->npages);
+ break;
+ case KVM_MR_MOVE:
+ rc = dat_delete_slot(mc, kvm->arch.gmap->asce, old->base_gfn, old->npages);
+ if (rc)
+ break;
+ fallthrough;
+ case KVM_MR_CREATE:
+ rc = dat_create_slot(mc, kvm->arch.gmap->asce, new->base_gfn, new->npages);
+ break;
+ case KVM_MR_FLAGS_ONLY:
+ break;
+ default:
+ WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
+ }
+ }
+out:
+ if (rc)
+ pr_warn("failed to commit memory region\n");
+ kvm_s390_free_mmu_cache(mc);
+}
+
+/**
+ * gmap_test_age_gfn() - test young
+ * @kvm: the kvm instance
+ * @range: the range of guest addresses whose young status needs to be cleared
+ *
+ * Context: called by KVM common code without holding the kvm mmu lock
+ * Return: true if any page in the given range is young, otherwise 0.
+ */
+bool gmap_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+ scoped_guard(read_lock, &kvm->mmu_lock)
+ return dat_test_age_gfn(kvm->arch.gmap->asce, range->start, range->end);
+}
diff --git a/arch/s390/kvm/gmap/trace-gmap.h b/arch/s390/kvm/gmap/trace-gmap.h
new file mode 100644
index 000000000000..25f8a527fdde
--- /dev/null
+++ b/arch/s390/kvm/gmap/trace-gmap.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(GMAP_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define GMAP_TRACE_KVM_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+#define TRACE_INCLUDE_PATH ../gmap
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace-gmap
+
+#ifdef KVM_S390_ARM64
+#define __KVM_FIELDS \
+ __field(unsigned long, pstate) \
+ __field(unsigned long, pc)
+#define __KVM_ASSIGN ({\
+ __entry->pstate = vcpu->arch.sae_block.pstate; \
+ __entry->pc = vcpu->arch.sae_block.pc; \
+ })
+#define __KVM_PRINT \
+ __entry->pstate, \
+ __entry->pc
+#else
+#define __KVM_FIELDS \
+ __field(unsigned long, pswmask) \
+ __field(unsigned long, pswaddr)
+#define __KVM_ASSIGN ({\
+ __entry->pswmask = vcpu->arch.sie_block->gpsw.mask; \
+ __entry->pswaddr = vcpu->arch.sie_block->gpsw.addr; \
+ })
+#define __KVM_PRINT \
+ __entry->pswmask,\
+ __entry->pswaddr
+#endif
+
+TRACE_EVENT(kvm_s390_major_guest_pfault,
+ TP_PROTO(struct kvm_vcpu *vcpu),
+ TP_ARGS(vcpu),
+
+ TP_STRUCT__entry(
+ __field(int, id)
+ __KVM_FIELDS
+ ),
+
+ TP_fast_assign(
+ __entry->id = vcpu->vcpu_id;
+ __KVM_ASSIGN
+ ),
+ TP_printk("%02d[%016lx-%016lx]: major fault, maybe applicable for pfault",
+ __entry->id,
+ __KVM_PRINT
+ )
+ );
+
+#endif /* GMAP_TRACE_KVM_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/s390/kvm/s390/s390.c b/arch/s390/kvm/s390/s390.c
index 6b5b366fb073..497abe3a83f4 100644
--- a/arch/s390/kvm/s390/s390.c
+++ b/arch/s390/kvm/s390/s390.c
@@ -734,33 +734,7 @@ static void sca_del_vcpu(struct kvm_vcpu *vcpu);
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
struct kvm_dirty_log *log)
{
- int r;
- unsigned long n;
- struct kvm_memory_slot *memslot;
- int is_dirty;
-
- if (kvm_is_ucontrol(kvm))
- return -EINVAL;
-
- mutex_lock(&kvm->slots_lock);
-
- r = -EINVAL;
- if (log->slot >= KVM_USER_MEM_SLOTS)
- goto out;
-
- r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
- if (r)
- goto out;
-
- /* Clear the dirty log */
- if (is_dirty) {
- n = kvm_dirty_bitmap_bytes(memslot);
- memset(memslot->dirty_bitmap, 0, n);
- }
- r = 0;
-out:
- mutex_unlock(&kvm->slots_lock);
- return r;
+ return gmap_get_dirty_log(kvm, log);
}

static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
@@ -1195,7 +1169,7 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm)
* Must be called with kvm->slots_lock to avoid races with ourselves and
* kvm_s390_vm_start_migration.
*/
-static int kvm_s390_vm_stop_migration(struct kvm *kvm)
+int kvm_s390_vm_stop_migration(struct kvm *kvm)
{
/* migration mode already disabled */
if (!kvm->arch.migration_mode)
@@ -5636,51 +5610,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
- gpa_t size;
-
- if (kvm_is_ucontrol(kvm) && new->id < KVM_USER_MEM_SLOTS)
- return -EINVAL;
-
- /* When we are protected, we should not change the memory slots */
- if (kvm_s390_pv_get_handle(kvm))
- return -EINVAL;
-
- if (change != KVM_MR_DELETE && change != KVM_MR_FLAGS_ONLY) {
- /*
- * A few sanity checks. We can have memory slots which have to be
- * located/ended at a segment boundary (1MB). The memory in userland is
- * ok to be fragmented into various different vmas. It is okay to mmap()
- * and munmap() stuff in this slot after doing this call at any time
- */
-
- if (new->userspace_addr & 0xffffful)
- return -EINVAL;
-
- size = new->npages * PAGE_SIZE;
- if (size & 0xffffful)
- return -EINVAL;
-
- if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
- return -EINVAL;
- }
-
- if (!kvm->arch.migration_mode)
- return 0;
-
- /*
- * Turn off migration mode when:
- * - userspace creates a new memslot with dirty logging off,
- * - userspace modifies an existing memslot (MOVE or FLAGS_ONLY) and
- * dirty logging is turned off.
- * Migration mode expects dirty page logging being enabled to store
- * its dirty bitmap.
- */
- if (change != KVM_MR_DELETE &&
- !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
- WARN(kvm_s390_vm_stop_migration(kvm),
- "Failed to stop migration mode");
-
- return 0;
+ return gmap_prepare_memory_region(kvm, old, new, change);
}

void kvm_arch_commit_memory_region(struct kvm *kvm,
@@ -5688,42 +5618,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
- struct kvm_s390_mmu_cache *mc = NULL;
- int rc = 0;
-
- if (change == KVM_MR_FLAGS_ONLY)
- return;
-
- mc = kvm_s390_new_mmu_cache();
- if (!mc) {
- rc = -ENOMEM;
- goto out;
- }
-
- scoped_guard(write_lock, &kvm->mmu_lock) {
- switch (change) {
- case KVM_MR_DELETE:
- rc = dat_delete_slot(mc, kvm->arch.gmap->asce, old->base_gfn, old->npages);
- break;
- case KVM_MR_MOVE:
- rc = dat_delete_slot(mc, kvm->arch.gmap->asce, old->base_gfn, old->npages);
- if (rc)
- break;
- fallthrough;
- case KVM_MR_CREATE:
- rc = dat_create_slot(mc, kvm->arch.gmap->asce, new->base_gfn, new->npages);
- break;
- case KVM_MR_FLAGS_ONLY:
- break;
- default:
- WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
- }
- }
-out:
- if (rc)
- pr_warn("failed to commit memory region\n");
- kvm_s390_free_mmu_cache(mc);
- return;
+ gmap_commit_memory_region(kvm, old, new, change);
}

/**
@@ -5736,8 +5631,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
*/
bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
- scoped_guard(read_lock, &kvm->mmu_lock)
- return dat_test_age_gfn(kvm->arch.gmap->asce, range->start, range->end);
+ return gmap_test_age_gfn(kvm, range);
}

/**
diff --git a/arch/s390/kvm/s390/s390.h b/arch/s390/kvm/s390/s390.h
index ad1aeec40cae..3acb01690bf6 100644
--- a/arch/s390/kvm/s390/s390.h
+++ b/arch/s390/kvm/s390/s390.h
@@ -32,6 +32,11 @@ union kvm_s390_quad {
unsigned char one;
};

+static inline bool kvm_s390_is_in_sie(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.sie_block->prog0c & PROG_IN_SIE;
+}
+
static inline void kvm_s390_fpu_store(struct kvm_run *run)
{
fpu_stfpc(&run->s.regs.fpc);
@@ -588,6 +593,11 @@ static inline bool kvm_s390_cur_gmap_fault_is_write(void)
return test_facility(75) && (current->thread.gmap_teid.fsi == TEID_FSI_STORE);
}

+static __always_inline int kvm_s390_is_migration_mode(struct kvm *kvm)
+{
+ return kvm->arch.migration_mode;
+}
+
/**
* kvm_s390_vcpu_crypto_reset_all
*
@@ -618,4 +628,10 @@ void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm);
*/
extern unsigned int diag9c_forwarding_hz;

+/*
+ * Must be called with kvm->slots_lock to avoid races with ourselves and
+ * kvm_s390_vm_start_migration.
+ */
+int kvm_s390_vm_stop_migration(struct kvm *kvm);
+
#endif
diff --git a/arch/s390/kvm/s390/trace.h b/arch/s390/kvm/s390/trace.h
index aa419eb6a0c8..97774fe09a85 100644
--- a/arch/s390/kvm/s390/trace.h
+++ b/arch/s390/kvm/s390/trace.h
@@ -45,20 +45,6 @@ TRACE_EVENT(kvm_s390_skey_related_inst,
VCPU_TP_PRINTK("%s", "storage key related instruction")
);

-TRACE_EVENT(kvm_s390_major_guest_pfault,
- TP_PROTO(VCPU_PROTO_COMMON),
- TP_ARGS(VCPU_ARGS_COMMON),
-
- TP_STRUCT__entry(
- VCPU_FIELD_COMMON
- ),
-
- TP_fast_assign(
- VCPU_ASSIGN_COMMON
- ),
- VCPU_TP_PRINTK("%s", "major fault, maybe applicable for pfault")
- );
-
TRACE_EVENT(kvm_s390_pfault_init,
TP_PROTO(VCPU_PROTO_COMMON, long pfault_token),
TP_ARGS(VCPU_ARGS_COMMON, pfault_token),
--
2.51.0