[RFC PATCH 27/35] KVM: SVM: Add support for booting APs for an SEV-ES guest

From: Tom Lendacky
Date: Mon Sep 14 2020 - 16:27:52 EST


From: Tom Lendacky <thomas.lendacky@xxxxxxx>

Typically under KVM, an AP is booted using the INIT-SIPI-SIPI sequence,
where the guest vCPU register state is updated and then the vCPU is VMRUN
to begin execution of the AP. For an SEV-ES guest, this won't work because
the guest register state is encrypted.

Following the GHCB specification, the hypervisor must not alter the guest
register state, so KVM must track an AP/vCPU boot. Should the guest want
to park the AP, it must use the AP Reset Hold exit event in place of, for
example, a HLT loop.

First AP boot (first INIT-SIPI-SIPI sequence):
Execute the AP (vCPU) as it was initialized and measured by the SEV-ES
support. It is up to the guest to transfer control of the AP to the
proper location.

Subsequent AP boot:
KVM will expect to receive an AP Reset Hold exit event indicating that
the vCPU is being parked and will require an INIT-SIPI-SIPI sequence to
awaken it. When the AP Reset Hold exit event is received, KVM will place
the vCPU into a simulated HLT mode. Upon receiving the INIT-SIPI-SIPI
sequence, KVM will make the vCPU runnable. It is again up to the guest
to then transfer control of the AP to the proper location.

The GHCB specification also requires the hypervisor to save the address of
an AP Jump Table so that, for example, vCPUs that have been parked by UEFI
can be started by the OS. Provide support for the AP Jump Table set/get
exit code.

Signed-off-by: Tom Lendacky <thomas.lendacky@xxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 2 ++
arch/x86/kvm/svm/sev.c | 48 +++++++++++++++++++++++++++++++++
arch/x86/kvm/svm/svm.c | 7 +++++
arch/x86/kvm/svm/svm.h | 3 +++
arch/x86/kvm/x86.c | 9 +++++++
5 files changed, 69 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 790659494aae..003f257d2155 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1237,6 +1237,8 @@ struct kvm_x86_ops {
unsigned long val);

bool (*allow_debug)(struct kvm *kvm);
+
+ void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
};

struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index da1736d228a6..cbb5f1b191bb 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -16,6 +16,8 @@
#include <linux/swap.h>
#include <linux/trace_events.h>

+#include <asm/trapnr.h>
+
#include "x86.h"
#include "svm.h"
#include "trace.h"
@@ -1472,6 +1474,35 @@ int sev_handle_vmgexit(struct vcpu_svm *svm)
control->exit_info_2,
svm->ghcb_sa);
break;
+ case SVM_VMGEXIT_AP_HLT_LOOP:
+ svm->ap_hlt_loop = true;
+ ret = kvm_emulate_halt(&svm->vcpu);
+ break;
+ case SVM_VMGEXIT_AP_JUMP_TABLE: {
+ struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
+
+ switch (control->exit_info_1) {
+ case 0:
+ /* Set AP jump table address */
+ sev->ap_jump_table = control->exit_info_2;
+ break;
+ case 1:
+ /* Get AP jump table address */
+ ghcb_set_sw_exit_info_2(ghcb, sev->ap_jump_table);
+ break;
+ default:
+ pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n",
+ control->exit_info_1);
+ ghcb_set_sw_exit_info_1(ghcb, 1);
+ ghcb_set_sw_exit_info_2(ghcb,
+ X86_TRAP_UD |
+ SVM_EVTINJ_TYPE_EXEPT |
+ SVM_EVTINJ_VALID);
+ }
+
+ ret = 1;
+ break;
+ }
case SVM_VMGEXIT_UNSUPPORTED_EVENT:
pr_err("vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
control->exit_info_1,
@@ -1492,3 +1523,20 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
return kvm_sev_es_string_io(&svm->vcpu, size, port,
svm->ghcb_sa, svm->ghcb_sa_len, in);
}
+
+void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ /* First SIPI: Use the the values as initially set by the VMM */
+ if (!svm->ap_hlt_loop)
+ return;
+
+ /*
+ * Subsequent SIPI: Return from an AP Reset Hold VMGEXIT, where
+ * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a
+ * non-zero value.
+ */
+ ghcb_set_sw_exit_info_2(svm->ghcb, 1);
+ svm->ap_hlt_loop = false;
+}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 48699c41b62a..ce1707dc9464 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4343,6 +4343,11 @@ static bool svm_allow_debug(struct kvm *kvm)
return !sev_es_guest(kvm);
}

+static void svm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
+{
+ sev_vcpu_deliver_sipi_vector(vcpu, vector);
+}
+
static void svm_vm_destroy(struct kvm *kvm)
{
avic_vm_destroy(kvm);
@@ -4486,6 +4491,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.reg_write_override = svm_reg_write_override,

.allow_debug = svm_allow_debug,
+
+ .vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
};

static struct kvm_x86_init_ops svm_init_ops __initdata = {
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 9f1c8ed88c79..a0b226c90feb 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -67,6 +67,7 @@ struct kvm_sev_info {
int fd; /* SEV device fd */
unsigned long pages_locked; /* Number of pages locked */
struct list_head regions_list; /* List of registered regions */
+ u64 ap_jump_table; /* SEV-ES AP Jump Table address */
};

struct kvm_svm {
@@ -165,6 +166,7 @@ struct vcpu_svm {
struct vmcb_save_area *vmsa;
struct ghcb *ghcb;
struct kvm_host_map ghcb_map;
+ bool ap_hlt_loop;

/* SEV-ES scratch area support */
void *ghcb_sa;
@@ -565,6 +567,7 @@ void __init sev_hardware_setup(void);
void sev_hardware_teardown(void);
int sev_handle_vmgexit(struct vcpu_svm *svm);
int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);
+void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);

/* VMSA Accessor functions */

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a53e24c1c5d1..23564d02d158 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9772,6 +9772,15 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
{
struct kvm_segment cs;

+ /*
+ * For SEV-ES, the register state can't be altered by KVM. If the VMSA
+ * is encrypted, call the vcpu_deliver_sipi_vector() x86 op.
+ */
+ if (vcpu->arch.vmsa_encrypted) {
+ kvm_x86_ops.vcpu_deliver_sipi_vector(vcpu, vector);
+ return;
+ }
+
kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
cs.selector = vector << 8;
cs.base = vector << 12;
--
2.28.0