[RFC PATCH v2 04/10] kvm: Allow reading/writing gmem using kvm_{read,write}_guest

From: Patrick Roy
Date: Tue Sep 10 2024 - 12:33:34 EST


If KVM can access guest_memfd memory (or at least convert it into a
state in which KVM can access it) without causing a host-kernel panic
(e.g. currently only if the vm type is KVM_X86_SW_PROTECTED_VM), allow
`kvm_{read,write}_guest` to access gfns that are backed by gmem. If KVM
cannot access guest_memfd memory (say, because it is running a TDX VM),
prepare a KVM_EXIT_MEMORY_FAULT (if possible) and return -EFAULT.

KVM can only prepare the memory fault exit inside the
`kvm_vcpu_{read,write}_guest` variant, as it needs a vcpu reference to
assign the exit reason to.

KVM accesses to gmem are done via the direct map (as no userspace
mappings exist, and even if they existed, they wouldn't be reflected
into the memslots). If `KVM_GMEM_NO_DIRECT_MAP` is set, then temporarily
reinsert the accessed folio into the direct map. Hold the folio lock for
the entire duration of the access to prevent concurrent direct map
modifications from taking place (as these might remove the direct map
entry while kvm_{read,write}_guest is using it, which would result in a
panic).

Signed-off-by: Patrick Roy <roypat@xxxxxxxxxxxx>
---
virt/kvm/kvm_main.c | 83 +++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 83 insertions(+)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d0788d0a72cc0..13347fb03d4a9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3286,11 +3286,51 @@ static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn,
return 0;
}

+static int __kvm_read_guest_private_page(struct kvm *kvm,
+ struct kvm_memory_slot *memslot, gfn_t gfn,
+ void *data, int offset, int len)
+{
+ kvm_pfn_t pfn;
+ int r;
+ struct folio *folio;
+
+ r = kvm_gmem_get_pfn(kvm, memslot, gfn, &pfn, NULL,
+ KVM_GMEM_GET_PFN_SHARED | KVM_GMEM_GET_PFN_LOCKED);
+
+ if (r < 0)
+ return r;
+
+ folio = pfn_folio(pfn);
+ memcpy(data, folio_address(folio) + offset, len);
+ r = kvm_gmem_put_shared_pfn(pfn);
+ folio_unlock(folio);
+ folio_put(folio);
+ return r;
+}
+
+static int __kvm_vcpu_read_guest_private_page(struct kvm_vcpu *vcpu,
+ struct kvm_memory_slot *memslot, gfn_t gfn,
+ void *data, int offset, int len)
+{
+ int r = __kvm_read_guest_private_page(vcpu->kvm, memslot, gfn, data, offset, len);
+
+ /* kvm not allowed to access gmem */
+ if (r == -EPERM) {
+ kvm_prepare_memory_fault_exit(vcpu, gfn + offset, len, false,
+ false, true);
+ return -EFAULT;
+ }
+
+ return r;
+}
+
int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
int len)
{
struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);

+ if (kvm_mem_is_private(kvm, gfn))
+ return __kvm_read_guest_private_page(kvm, slot, gfn, data, offset, len);
return __kvm_read_guest_page(slot, gfn, data, offset, len);
}
EXPORT_SYMBOL_GPL(kvm_read_guest_page);
@@ -3300,6 +3340,8 @@ int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data,
{
struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);

+ if (kvm_mem_is_private(vcpu->kvm, gfn))
+ return __kvm_vcpu_read_guest_private_page(vcpu, slot, gfn, data, offset, len);
return __kvm_read_guest_page(slot, gfn, data, offset, len);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_page);
@@ -3390,11 +3432,50 @@ static int __kvm_write_guest_page(struct kvm *kvm,
return 0;
}

+static int __kvm_write_guest_private_page(struct kvm *kvm,
+ struct kvm_memory_slot *memslot, gfn_t gfn,
+ const void *data, int offset, int len)
+{
+ kvm_pfn_t pfn;
+ int r;
+ struct folio *folio;
+
+ r = kvm_gmem_get_pfn(kvm, memslot, gfn, &pfn, NULL,
+ KVM_GMEM_GET_PFN_SHARED | KVM_GMEM_GET_PFN_LOCKED);
+
+ if (r < 0)
+ return r;
+
+ folio = pfn_folio(pfn);
+ memcpy(folio_address(folio) + offset, data, len);
+ r = kvm_gmem_put_shared_pfn(pfn);
+ folio_unlock(folio);
+ folio_put(folio);
+ return r;
+}
+
+static int __kvm_vcpu_write_guest_private_page(struct kvm_vcpu *vcpu,
+ struct kvm_memory_slot *memslot, gfn_t gfn,
+ const void *data, int offset, int len)
+{
+ int r = __kvm_write_guest_private_page(vcpu->kvm, memslot, gfn, data, offset, len);
+
+ if (r == -EPERM) {
+ kvm_prepare_memory_fault_exit(vcpu, gfn + offset, len, true,
+ false, true);
+ return -EFAULT;
+ }
+
+ return r;
+}
+
int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn,
const void *data, int offset, int len)
{
struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);

+ if (kvm_mem_is_private(kvm, gfn))
+ return __kvm_write_guest_private_page(kvm, slot, gfn, data, offset, len);
return __kvm_write_guest_page(kvm, slot, gfn, data, offset, len);
}
EXPORT_SYMBOL_GPL(kvm_write_guest_page);
@@ -3404,6 +3485,8 @@ int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
{
struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);

+ if (kvm_mem_is_private(vcpu->kvm, gfn))
+ return __kvm_vcpu_write_guest_private_page(vcpu, slot, gfn, data, offset, len);
return __kvm_write_guest_page(vcpu->kvm, slot, gfn, data, offset, len);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_page);
--
2.46.0