[RFC PATCH 05/35] KVM: SVM: Add initial support for SEV-ES GHCB access to KVM

From: Tom Lendacky
Date: Mon Sep 14 2020 - 16:34:41 EST


From: Tom Lendacky <thomas.lendacky@xxxxxxx>

Provide initial support for accessing the GHCB when needing to access
registers for an SEV-ES guest. The support consists of:

- Accessing the GHCB instead of the VMSA when reading and writing
guest registers (after the VMSA has been encrypted).
- Creating register access override functions for reading and writing
guest registers from the common KVM support.
- Allocating pages for the VMSA and GHCB when creating each vCPU
- The VMSA page holds the encrypted VMSA for the vCPU
- The GHCB page is used to hold a copy of the guest GHCB during
VMGEXIT processing.

Signed-off-by: Tom Lendacky <thomas.lendacky@xxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 7 ++
arch/x86/include/asm/msr-index.h | 1 +
arch/x86/kvm/kvm_cache_regs.h | 30 +++++--
arch/x86/kvm/svm/svm.c | 138 ++++++++++++++++++++++++++++++-
arch/x86/kvm/svm/svm.h | 65 ++++++++++++++-
5 files changed, 230 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5303dbc5c9bc..c900992701d6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -788,6 +788,9 @@ struct kvm_vcpu_arch {

/* AMD MSRC001_0015 Hardware Configuration */
u64 msr_hwcr;
+
+ /* SEV-ES support */
+ bool vmsa_encrypted;
};

struct kvm_lpage_info {
@@ -1227,6 +1230,10 @@ struct kvm_x86_ops {
int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);

void (*migrate_timers)(struct kvm_vcpu *vcpu);
+
+ void (*reg_read_override)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
+ void (*reg_write_override)(struct kvm_vcpu *vcpu, enum kvm_reg reg,
+ unsigned long val);
};

struct kvm_x86_nested_ops {
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 249a4147c4b2..16f5b20bb099 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -466,6 +466,7 @@
#define MSR_AMD64_IBSBRTARGET 0xc001103b
#define MSR_AMD64_IBSOPDATA4 0xc001103d
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
+#define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e
#define MSR_AMD64_SEV_ES_GHCB 0xc0010130
#define MSR_AMD64_SEV 0xc0010131
#define MSR_AMD64_SEV_ENABLED_BIT 0
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index cfe83d4ae625..e87eb90999d5 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -9,15 +9,21 @@
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
| X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE | X86_CR4_TSD)

-#define BUILD_KVM_GPR_ACCESSORS(lname, uname) \
-static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\
-{ \
- return vcpu->arch.regs[VCPU_REGS_##uname]; \
-} \
-static __always_inline void kvm_##lname##_write(struct kvm_vcpu *vcpu, \
- unsigned long val) \
-{ \
- vcpu->arch.regs[VCPU_REGS_##uname] = val; \
+#define BUILD_KVM_GPR_ACCESSORS(lname, uname) \
+static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu) \
+{ \
+ if (kvm_x86_ops.reg_read_override) \
+ kvm_x86_ops.reg_read_override(vcpu, VCPU_REGS_##uname); \
+ \
+ return vcpu->arch.regs[VCPU_REGS_##uname]; \
+} \
+static __always_inline void kvm_##lname##_write(struct kvm_vcpu *vcpu, \
+ unsigned long val) \
+{ \
+ if (kvm_x86_ops.reg_write_override) \
+ kvm_x86_ops.reg_write_override(vcpu, VCPU_REGS_##uname, val); \
+ \
+ vcpu->arch.regs[VCPU_REGS_##uname] = val; \
}
BUILD_KVM_GPR_ACCESSORS(rax, RAX)
BUILD_KVM_GPR_ACCESSORS(rbx, RBX)
@@ -67,6 +73,9 @@ static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, int reg)
if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS))
return 0;

+ if (kvm_x86_ops.reg_read_override)
+ kvm_x86_ops.reg_read_override(vcpu, reg);
+
if (!kvm_register_is_available(vcpu, reg))
kvm_x86_ops.cache_reg(vcpu, reg);

@@ -79,6 +88,9 @@ static inline void kvm_register_write(struct kvm_vcpu *vcpu, int reg,
if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS))
return;

+ if (kvm_x86_ops.reg_write_override)
+ kvm_x86_ops.reg_write_override(vcpu, reg, val);
+
vcpu->arch.regs[reg] = val;
kvm_register_mark_dirty(vcpu, reg);
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 779c167e42cc..d1f52211627a 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1175,6 +1175,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
struct page *msrpm_pages;
struct page *hsave_page;
struct page *nested_msrpm_pages;
+ struct page *vmsa_page = NULL;
int err;

BUILD_BUG_ON(offsetof(struct vcpu_svm, vcpu) != 0);
@@ -1197,9 +1198,19 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
if (!hsave_page)
goto free_page3;

+ if (sev_es_guest(svm->vcpu.kvm)) {
+ /*
+ * SEV-ES guests require a separate VMSA page used to contain
+ * the encrypted register state of the guest.
+ */
+ vmsa_page = alloc_page(GFP_KERNEL);
+ if (!vmsa_page)
+ goto free_page4;
+ }
+
err = avic_init_vcpu(svm);
if (err)
- goto free_page4;
+ goto free_page5;

/* We initialize this flag to true to make sure that the is_running
* bit would be set the first time the vcpu is loaded.
@@ -1219,6 +1230,12 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
svm->vmcb = page_address(page);
clear_page(svm->vmcb);
svm->vmcb_pa = __sme_set(page_to_pfn(page) << PAGE_SHIFT);
+
+ if (vmsa_page) {
+ svm->vmsa = page_address(vmsa_page);
+ clear_page(svm->vmsa);
+ }
+
svm->asid_generation = 0;
init_vmcb(svm);

@@ -1227,6 +1244,9 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)

return 0;

+free_page5:
+ if (vmsa_page)
+ __free_page(vmsa_page);
free_page4:
__free_page(hsave_page);
free_page3:
@@ -1258,6 +1278,26 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
*/
svm_clear_current_vmcb(svm->vmcb);

+ if (sev_es_guest(vcpu->kvm)) {
+ struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
+
+ if (vcpu->arch.vmsa_encrypted) {
+ u64 page_to_flush;
+
+ /*
+ * The VMSA page was used by hardware to hold guest
+ * encrypted state, be sure to flush it before returning
+ * it to the system. This is done using the VM Page
+ * Flush MSR (which takes the page virtual address and
+ * guest ASID).
+ */
+ page_to_flush = (u64)svm->vmsa | sev->asid;
+ wrmsrl(MSR_AMD64_VM_PAGE_FLUSH, page_to_flush);
+ }
+
+ __free_page(virt_to_page(svm->vmsa));
+ }
+
__free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
__free_page(virt_to_page(svm->nested.hsave));
@@ -4012,6 +4052,99 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
(svm->vmcb->control.intercept & (1ULL << INTERCEPT_INIT));
}

+/*
+ * These return values represent the offset in quad words within the VM save
+ * area. This allows them to be accessed by casting the save area to a u64
+ * array.
+ */
+#define VMSA_REG_ENTRY(_field) (offsetof(struct vmcb_save_area, _field) / sizeof(u64))
+#define VMSA_REG_UNDEF VMSA_REG_ENTRY(valid_bitmap)
+static inline unsigned int vcpu_to_vmsa_entry(enum kvm_reg reg)
+{
+ switch (reg) {
+ case VCPU_REGS_RAX: return VMSA_REG_ENTRY(rax);
+ case VCPU_REGS_RBX: return VMSA_REG_ENTRY(rbx);
+ case VCPU_REGS_RCX: return VMSA_REG_ENTRY(rcx);
+ case VCPU_REGS_RDX: return VMSA_REG_ENTRY(rdx);
+ case VCPU_REGS_RSP: return VMSA_REG_ENTRY(rsp);
+ case VCPU_REGS_RBP: return VMSA_REG_ENTRY(rbp);
+ case VCPU_REGS_RSI: return VMSA_REG_ENTRY(rsi);
+ case VCPU_REGS_RDI: return VMSA_REG_ENTRY(rdi);
+#ifdef CONFIG_X86_64
+ case VCPU_REGS_R8: return VMSA_REG_ENTRY(r8);
+ case VCPU_REGS_R9: return VMSA_REG_ENTRY(r9);
+ case VCPU_REGS_R10: return VMSA_REG_ENTRY(r10);
+ case VCPU_REGS_R11: return VMSA_REG_ENTRY(r11);
+ case VCPU_REGS_R12: return VMSA_REG_ENTRY(r12);
+ case VCPU_REGS_R13: return VMSA_REG_ENTRY(r13);
+ case VCPU_REGS_R14: return VMSA_REG_ENTRY(r14);
+ case VCPU_REGS_R15: return VMSA_REG_ENTRY(r15);
+#endif
+ case VCPU_REGS_RIP: return VMSA_REG_ENTRY(rip);
+ default:
+ WARN_ONCE(1, "unsupported VCPU to VMSA register conversion\n");
+ return VMSA_REG_UNDEF;
+ }
+}
+
+/* For SEV-ES guests, populate the vCPU register from the appropriate VMSA/GHCB */
+static void svm_reg_read_override(struct kvm_vcpu *vcpu, enum kvm_reg reg)
+{
+ struct vmcb_save_area *vmsa;
+ struct vcpu_svm *svm;
+ unsigned int entry;
+ unsigned long val;
+ u64 *vmsa_reg;
+
+ if (!sev_es_guest(vcpu->kvm))
+ return;
+
+ entry = vcpu_to_vmsa_entry(reg);
+ if (entry == VMSA_REG_UNDEF)
+ return;
+
+ svm = to_svm(vcpu);
+ vmsa = get_vmsa(svm);
+ vmsa_reg = (u64 *)vmsa;
+ val = (unsigned long)vmsa_reg[entry];
+
+ /* If a GHCB is mapped, check the bitmap of valid entries */
+ if (svm->ghcb) {
+ if (!test_bit(entry, (unsigned long *)vmsa->valid_bitmap))
+ val = 0;
+ }
+
+ vcpu->arch.regs[reg] = val;
+}
+
+/* For SEV-ES guests, set the vCPU register in the appropriate VMSA */
+static void svm_reg_write_override(struct kvm_vcpu *vcpu, enum kvm_reg reg,
+ unsigned long val)
+{
+ struct vmcb_save_area *vmsa;
+ struct vcpu_svm *svm;
+ unsigned int entry;
+ u64 *vmsa_reg;
+
+ entry = vcpu_to_vmsa_entry(reg);
+ if (entry == VMSA_REG_UNDEF)
+ return;
+
+ svm = to_svm(vcpu);
+ vmsa = get_vmsa(svm);
+ vmsa_reg = (u64 *)vmsa;
+
+ /* If a GHCB is mapped, set the bit to indicate a valid entry */
+ if (svm->ghcb) {
+ unsigned int index = entry / 8;
+ unsigned int shift = entry % 8;
+
+ vmsa->valid_bitmap[index] |= BIT(shift);
+ }
+
+ vmsa_reg[entry] = val;
+}
+
static void svm_vm_destroy(struct kvm *kvm)
{
avic_vm_destroy(kvm);
@@ -4150,6 +4283,9 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.need_emulation_on_page_fault = svm_need_emulation_on_page_fault,

.apic_init_signal_blocked = svm_apic_init_signal_blocked,
+
+ .reg_read_override = svm_reg_read_override,
+ .reg_write_override = svm_reg_write_override,
};

static struct kvm_x86_init_ops svm_init_ops __initdata = {
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index f42ba9d158df..ff587536f571 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -159,6 +159,10 @@ struct vcpu_svm {
*/
struct list_head ir_list;
spinlock_t ir_list_lock;
+
+ /* SEV-ES support */
+ struct vmcb_save_area *vmsa;
+ struct ghcb *ghcb;
};

struct svm_cpu_data {
@@ -509,9 +513,34 @@ void sev_hardware_teardown(void);

static inline struct vmcb_save_area *get_vmsa(struct vcpu_svm *svm)
{
- return &svm->vmcb->save;
+ struct vmcb_save_area *vmsa;
+
+ if (sev_es_guest(svm->vcpu.kvm)) {
+ /*
+ * Before LAUNCH_UPDATE_VMSA, use the actual SEV-ES save area
+ * to construct the initial state. Afterwards, use the mapped
+ * GHCB in a VMGEXIT or the traditional save area as a scratch
+ * area when outside of a VMGEXIT.
+ */
+ if (svm->vcpu.arch.vmsa_encrypted) {
+ if (svm->ghcb)
+ vmsa = &svm->ghcb->save;
+ else
+ vmsa = &svm->vmcb->save;
+ } else {
+ vmsa = svm->vmsa;
+ }
+ } else {
+ vmsa = &svm->vmcb->save;
+ }
+
+ return vmsa;
}

+#define SEV_ES_SET_VALID(_vmsa, _field) \
+ __set_bit(GHCB_BITMAP_IDX(_field), \
+ (unsigned long *)(_vmsa)->valid_bitmap)
+
#define DEFINE_VMSA_SEGMENT_ENTRY(_field, _entry, _size) \
static inline _size \
svm_##_field##_read_##_entry(struct vcpu_svm *svm) \
@@ -528,6 +557,9 @@ static inline struct vmcb_save_area *get_vmsa(struct vcpu_svm *svm)
struct vmcb_save_area *vmsa = get_vmsa(svm); \
\
vmsa->_field._entry = value; \
+ if (svm->vcpu.arch.vmsa_encrypted) { \
+ SEV_ES_SET_VALID(vmsa, _field); \
+ } \
} \

#define DEFINE_VMSA_SEGMENT_ACCESSOR(_field) \
@@ -551,6 +583,9 @@ static inline struct vmcb_save_area *get_vmsa(struct vcpu_svm *svm)
struct vmcb_save_area *vmsa = get_vmsa(svm); \
\
vmsa->_field = *seg; \
+ if (svm->vcpu.arch.vmsa_encrypted) { \
+ SEV_ES_SET_VALID(vmsa, _field); \
+ } \
}

DEFINE_VMSA_SEGMENT_ACCESSOR(cs)
@@ -579,6 +614,9 @@ DEFINE_VMSA_SEGMENT_ACCESSOR(tr)
struct vmcb_save_area *vmsa = get_vmsa(svm); \
\
vmsa->_field = value; \
+ if (svm->vcpu.arch.vmsa_encrypted) { \
+ SEV_ES_SET_VALID(vmsa, _field); \
+ } \
} \
\
static inline void \
@@ -587,6 +625,9 @@ DEFINE_VMSA_SEGMENT_ACCESSOR(tr)
struct vmcb_save_area *vmsa = get_vmsa(svm); \
\
vmsa->_field &= value; \
+ if (svm->vcpu.arch.vmsa_encrypted) { \
+ SEV_ES_SET_VALID(vmsa, _field); \
+ } \
} \
\
static inline void \
@@ -595,6 +636,9 @@ DEFINE_VMSA_SEGMENT_ACCESSOR(tr)
struct vmcb_save_area *vmsa = get_vmsa(svm); \
\
vmsa->_field |= value; \
+ if (svm->vcpu.arch.vmsa_encrypted) { \
+ SEV_ES_SET_VALID(vmsa, _field); \
+ } \
}

#define DEFINE_VMSA_ACCESSOR(_field) \
@@ -629,6 +673,25 @@ DEFINE_VMSA_ACCESSOR(last_excp_to)
DEFINE_VMSA_U8_ACCESSOR(cpl)
DEFINE_VMSA_ACCESSOR(rip)
DEFINE_VMSA_ACCESSOR(rax)
+DEFINE_VMSA_ACCESSOR(rbx)
+DEFINE_VMSA_ACCESSOR(rcx)
+DEFINE_VMSA_ACCESSOR(rdx)
DEFINE_VMSA_ACCESSOR(rsp)
+DEFINE_VMSA_ACCESSOR(rbp)
+DEFINE_VMSA_ACCESSOR(rsi)
+DEFINE_VMSA_ACCESSOR(rdi)
+DEFINE_VMSA_ACCESSOR(r8)
+DEFINE_VMSA_ACCESSOR(r9)
+DEFINE_VMSA_ACCESSOR(r10)
+DEFINE_VMSA_ACCESSOR(r11)
+DEFINE_VMSA_ACCESSOR(r12)
+DEFINE_VMSA_ACCESSOR(r13)
+DEFINE_VMSA_ACCESSOR(r14)
+DEFINE_VMSA_ACCESSOR(r15)
+DEFINE_VMSA_ACCESSOR(sw_exit_code)
+DEFINE_VMSA_ACCESSOR(sw_exit_info_1)
+DEFINE_VMSA_ACCESSOR(sw_exit_info_2)
+DEFINE_VMSA_ACCESSOR(sw_scratch)
+DEFINE_VMSA_ACCESSOR(xcr0)

#endif
--
2.28.0