[PATCH v2 01/13] KVM: Add KVM_MEM_USERFAULT memslot flag and bitmap

From: James Houghton
Date: Thu Jan 09 2025 - 15:50:54 EST


Use one of the 14 reserved u64s in struct kvm_userspace_memory_region2
for the user to provide `userfault_bitmap`.

The memslot flag indicates if KVM should be reading from the
`userfault_bitmap` field from the memslot. The user is permitted to
provide a bogus pointer. If the pointer cannot be read from, we will
return -EFAULT (with no other information) back to the user.

Signed-off-by: James Houghton <jthoughton@xxxxxxxxxx>
---
include/linux/kvm_host.h | 14 ++++++++++++++
include/uapi/linux/kvm.h | 4 +++-
virt/kvm/Kconfig | 3 +++
virt/kvm/kvm_main.c | 35 +++++++++++++++++++++++++++++++++++
4 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 401439bb21e3..f7a3dfd5e224 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -590,6 +590,7 @@ struct kvm_memory_slot {
unsigned long *dirty_bitmap;
struct kvm_arch_memory_slot arch;
unsigned long userspace_addr;
+ unsigned long __user *userfault_bitmap;
u32 flags;
short id;
u16 as_id;
@@ -724,6 +725,11 @@ static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
}
#endif

+static inline bool kvm_has_userfault(struct kvm *kvm)
+{
+ return IS_ENABLED(CONFIG_HAVE_KVM_USERFAULT);
+}
+
struct kvm_memslots {
u64 generation;
atomic_long_t last_used_slot;
@@ -2553,4 +2559,12 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
struct kvm_pre_fault_memory *range);
#endif

+int kvm_gfn_userfault(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ gfn_t gfn);
+
+static inline bool kvm_memslot_userfault(struct kvm_memory_slot *memslot)
+{
+ return memslot->flags & KVM_MEM_USERFAULT;
+}
+
#endif
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 343de0a51797..7ade5169d373 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -40,7 +40,8 @@ struct kvm_userspace_memory_region2 {
__u64 guest_memfd_offset;
__u32 guest_memfd;
__u32 pad1;
- __u64 pad2[14];
+ __u64 userfault_bitmap;
+ __u64 pad2[13];
};

/*
@@ -51,6 +52,7 @@ struct kvm_userspace_memory_region2 {
#define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0)
#define KVM_MEM_READONLY (1UL << 1)
#define KVM_MEM_GUEST_MEMFD (1UL << 2)
+#define KVM_MEM_USERFAULT (1UL << 3)

/* for KVM_IRQ_LINE */
struct kvm_irq_level {
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 54e959e7d68f..9eb1fae238b1 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -124,3 +124,6 @@ config HAVE_KVM_ARCH_GMEM_PREPARE
config HAVE_KVM_ARCH_GMEM_INVALIDATE
bool
depends on KVM_PRIVATE_MEM
+
+config HAVE_KVM_USERFAULT
+ bool
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index de2c11dae231..4bceae6a6401 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1541,6 +1541,9 @@ static int check_memory_region_flags(struct kvm *kvm,
!(mem->flags & KVM_MEM_GUEST_MEMFD))
valid_flags |= KVM_MEM_READONLY;

+ if (kvm_has_userfault(kvm))
+ valid_flags |= KVM_MEM_USERFAULT;
+
if (mem->flags & ~valid_flags)
return -EINVAL;

@@ -1974,6 +1977,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
return -EINVAL;
if ((mem->memory_size >> PAGE_SHIFT) > KVM_MEM_MAX_NR_PAGES)
return -EINVAL;
+ if (mem->flags & KVM_MEM_USERFAULT &&
+ ((mem->userfault_bitmap != untagged_addr(mem->userfault_bitmap)) ||
+ !access_ok((void __user *)(unsigned long)mem->userfault_bitmap,
+ DIV_ROUND_UP(mem->memory_size >> PAGE_SHIFT, BITS_PER_LONG)
+ * sizeof(long))))
+ return -EINVAL;

slots = __kvm_memslots(kvm, as_id);

@@ -2042,6 +2051,9 @@ int __kvm_set_memory_region(struct kvm *kvm,
if (r)
goto out;
}
+ if (mem->flags & KVM_MEM_USERFAULT)
+ new->userfault_bitmap =
+ (unsigned long __user *)(unsigned long)mem->userfault_bitmap;

r = kvm_set_memslot(kvm, old, new, change);
if (r)
@@ -6426,3 +6438,26 @@ void kvm_exit(void)
kvm_irqfd_exit();
}
EXPORT_SYMBOL_GPL(kvm_exit);
+
+int kvm_gfn_userfault(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ gfn_t gfn)
+{
+ unsigned long bitmap_chunk = 0;
+ off_t offset;
+
+ if (!kvm_memslot_userfault(memslot))
+ return 0;
+
+ if (WARN_ON_ONCE(!memslot->userfault_bitmap))
+ return 0;
+
+ offset = gfn - memslot->base_gfn;
+
+ if (copy_from_user(&bitmap_chunk,
+ memslot->userfault_bitmap + offset / BITS_PER_LONG,
+ sizeof(bitmap_chunk)))
+ return -EFAULT;
+
+ /* Set in the bitmap means that the gfn is userfault */
+ return !!(bitmap_chunk & (1ul << (offset % BITS_PER_LONG)));
+}
--
2.47.1.613.gc27f4b7a9f-goog