[RFC PATCH 2/6] KVM: SVM: Add pinning metadata in the arch memslot

From: Nikunj A Dadhania
Date: Tue Jan 18 2022 - 06:07:01 EST


AMD SEV guest requires the guest's pages to be pinned in host physical
memory. The memory encryption scheme uses the physical address of the
memory being encrypted. If guest pages are moved, content decrypted would
be incorrect, corrupting guest's memory.

For SEV/SEV-ES guests, the hypervisor doesn't know which pages are
encrypted and when the guest is done using those pages. Hypervisor should
treat all the guest pages as encrypted until the guest is destroyed.

The KVM MMU needs to track the pages that are pinned and the corresponding
pfns for unpinning them during the guest destroy path.

Signed-off-by: Nikunj A Dadhania <nikunj@xxxxxxx>
---
arch/x86/include/asm/kvm-x86-ops.h | 2 ++
arch/x86/include/asm/kvm_host.h | 7 +++++
arch/x86/kvm/svm/sev.c | 49 ++++++++++++++++++++++++++++++
arch/x86/kvm/svm/svm.c | 3 ++
arch/x86/kvm/svm/svm.h | 6 ++++
arch/x86/kvm/x86.c | 11 ++++++-
6 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index a96c52a99a04..da03250f503c 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -87,6 +87,8 @@ KVM_X86_OP(set_tss_addr)
KVM_X86_OP(set_identity_map_addr)
KVM_X86_OP(get_mt_mask)
KVM_X86_OP(load_mmu_pgd)
+KVM_X86_OP(alloc_memslot_metadata)
+KVM_X86_OP(free_memslot)
KVM_X86_OP(pin_spte)
KVM_X86_OP_NULL(has_wbinvd_exit)
KVM_X86_OP(get_l2_tsc_offset)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1263a16dd588..c235597f8442 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -927,6 +927,8 @@ struct kvm_arch_memory_slot {
struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES];
struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
unsigned short *gfn_track[KVM_PAGE_TRACK_MAX];
+ unsigned long *pinned_bitmap;
+ kvm_pfn_t *pfns;
};

/*
@@ -1417,6 +1419,11 @@ struct kvm_x86_ops {

void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa,
int root_level);
+ int (*alloc_memslot_metadata)(struct kvm *kvm,
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new);
+ void (*free_memslot)(struct kvm *kvm,
+ struct kvm_memory_slot *slot);
void (*pin_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
kvm_pfn_t pfn);

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 6a22798eaaee..d972ab4956d4 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2945,3 +2945,52 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)

ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1);
}
+
+void sev_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+ struct kvm_arch_memory_slot *aslot = &slot->arch;
+
+ if (!sev_guest(kvm))
+ return;
+
+ if (aslot->pinned_bitmap) {
+ kvfree(aslot->pinned_bitmap);
+ aslot->pinned_bitmap = NULL;
+ }
+
+ if (aslot->pfns) {
+ kvfree(aslot->pfns);
+ aslot->pfns = NULL;
+ }
+}
+
+int sev_alloc_memslot_metadata(struct kvm *kvm,
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new)
+{
+ struct kvm_arch_memory_slot *aslot = &new->arch;
+ unsigned long pinned_bytes = new->npages * sizeof(kvm_pfn_t);
+
+ if (!sev_guest(kvm))
+ return 0;
+
+ if (old && old->arch.pinned_bitmap && old->arch.pfns) {
+ WARN_ON(old->npages != new->npages);
+ aslot->pinned_bitmap = old->arch.pinned_bitmap;
+ aslot->pfns = old->arch.pfns;
+ return 0;
+ }
+
+ aslot->pfns = kvcalloc(new->npages, sizeof(*aslot->pfns),
+ GFP_KERNEL_ACCOUNT);
+ if (!aslot->pfns)
+ return -ENOMEM;
+
+ aslot->pinned_bitmap = kvzalloc(pinned_bytes, GFP_KERNEL_ACCOUNT);
+ if (!aslot->pinned_bitmap) {
+ kvfree(aslot->pfns);
+ aslot->pfns = NULL;
+ return -ENOMEM;
+ }
+ return 0;
+}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 46bcc706f257..3fb19974f719 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4740,6 +4740,9 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.complete_emulated_msr = svm_complete_emulated_msr,

.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
+
+ .alloc_memslot_metadata = sev_alloc_memslot_metadata,
+ .free_memslot = sev_free_memslot,
};

static struct kvm_x86_init_ops svm_init_ops __initdata = {
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 9f153c59f2c8..b2f8b3b52680 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -643,4 +643,10 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm);
void __svm_sev_es_vcpu_run(unsigned long vmcb_pa);
void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs);

+int sev_alloc_memslot_metadata(struct kvm *kvm,
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new);
+void sev_free_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *slot);
+
#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 76b4803dd3bd..9e07e2ef8885 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11694,6 +11694,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
}

kvm_page_track_free_memslot(slot);
+ static_call_cond(kvm_x86_free_memslot)(kvm, slot);
}

int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages)
@@ -11719,6 +11720,7 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages)
}

static int kvm_alloc_memslot_metadata(struct kvm *kvm,
+ const struct kvm_memory_slot *old,
struct kvm_memory_slot *slot)
{
unsigned long npages = slot->npages;
@@ -11771,8 +11773,15 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm,
if (kvm_page_track_create_memslot(kvm, slot, npages))
goto out_free;

+ if (kvm_x86_ops.alloc_memslot_metadata &&
+ static_call(kvm_x86_alloc_memslot_metadata)(kvm, old, slot))
+ goto out_free_page_track;
+
return 0;

+out_free_page_track:
+ kvm_page_track_free_memslot(slot);
+
out_free:
memslot_rmap_free(slot);

@@ -11805,7 +11814,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
enum kvm_mr_change change)
{
if (change == KVM_MR_CREATE || change == KVM_MR_MOVE)
- return kvm_alloc_memslot_metadata(kvm, new);
+ return kvm_alloc_memslot_metadata(kvm, old, new);

if (change == KVM_MR_FLAGS_ONLY)
memcpy(&new->arch, &old->arch, sizeof(old->arch));
--
2.32.0