Re: [PATCH v5 27/34] KVM: SVM: Add support for booting APs for an SEV-ES guest

From: Paolo Bonzini
Date: Mon Dec 14 2020 - 11:05:29 EST


On 10/12/20 18:10, Tom Lendacky wrote:
From: Tom Lendacky <thomas.lendacky@xxxxxxx>

Typically under KVM, an AP is booted using the INIT-SIPI-SIPI sequence,
where the guest vCPU register state is updated and then the vCPU is VMRUN
to begin execution of the AP. For an SEV-ES guest, this won't work because
the guest register state is encrypted.

Following the GHCB specification, the hypervisor must not alter the guest
register state, so KVM must track an AP/vCPU boot. Should the guest want
to park the AP, it must use the AP Reset Hold exit event in place of, for
example, a HLT loop.

First AP boot (first INIT-SIPI-SIPI sequence):
Execute the AP (vCPU) as it was initialized and measured by the SEV-ES
support. It is up to the guest to transfer control of the AP to the
proper location.

Subsequent AP boot:
KVM will expect to receive an AP Reset Hold exit event indicating that
the vCPU is being parked and will require an INIT-SIPI-SIPI sequence to
awaken it. When the AP Reset Hold exit event is received, KVM will place
the vCPU into a simulated HLT mode. Upon receiving the INIT-SIPI-SIPI
sequence, KVM will make the vCPU runnable. It is again up to the guest
to then transfer control of the AP to the proper location.

The GHCB specification also requires the hypervisor to save the address of
an AP Jump Table so that, for example, vCPUs that have been parked by UEFI
can be started by the OS. Provide support for the AP Jump Table set/get
exit code.

Signed-off-by: Tom Lendacky <thomas.lendacky@xxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 2 ++
arch/x86/kvm/svm/sev.c | 50 +++++++++++++++++++++++++++++++++
arch/x86/kvm/svm/svm.c | 7 +++++
arch/x86/kvm/svm/svm.h | 3 ++
arch/x86/kvm/x86.c | 9 ++++++
5 files changed, 71 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 048b08437c33..60a3b9d33407 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1286,6 +1286,8 @@ struct kvm_x86_ops {
void (*migrate_timers)(struct kvm_vcpu *vcpu);
void (*msr_filter_changed)(struct kvm_vcpu *vcpu);
+
+ void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
};
struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index a7531de760b5..b47285384b1f 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -17,6 +17,8 @@
#include <linux/processor.h>
#include <linux/trace_events.h>
+#include <asm/trapnr.h>
+
#include "x86.h"
#include "svm.h"
#include "cpuid.h"
@@ -1449,6 +1451,8 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
if (!ghcb_sw_scratch_is_valid(ghcb))
goto vmgexit_err;
break;
+ case SVM_VMGEXIT_AP_HLT_LOOP:
+ case SVM_VMGEXIT_AP_JUMP_TABLE:
case SVM_VMGEXIT_UNSUPPORTED_EVENT:
break;
default:
@@ -1770,6 +1774,35 @@ int sev_handle_vmgexit(struct vcpu_svm *svm)
control->exit_info_2,
svm->ghcb_sa);
break;
+ case SVM_VMGEXIT_AP_HLT_LOOP:
+ svm->ap_hlt_loop = true;

This value needs to be communicated to userspace. Let's get this right from the beginning and use a new KVM_MP_STATE_* value instead (perhaps reuse KVM_MP_STATE_STOPPED but for x86 #define it as KVM_MP_STATE_AP_HOLD_RECEIVED?).

@@ -68,6 +68,7 @@ struct kvm_sev_info {
int fd; /* SEV device fd */
unsigned long pages_locked; /* Number of pages locked */
struct list_head regions_list; /* List of registered regions */
+ u64 ap_jump_table; /* SEV-ES AP Jump Table address */

Do you have any plans for migration of this value? How does the guest ensure that the hypervisor does not screw with it?

Paolo

+ ret = kvm_emulate_halt(&svm->vcpu);
+ break;
+ case SVM_VMGEXIT_AP_JUMP_TABLE: {
+ struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
+
+ switch (control->exit_info_1) {
+ case 0:
+ /* Set AP jump table address */
+ sev->ap_jump_table = control->exit_info_2;
+ break;
+ case 1:
+ /* Get AP jump table address */
+ ghcb_set_sw_exit_info_2(ghcb, sev->ap_jump_table);
+ break;
+ default:
+ pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n",
+ control->exit_info_1);
+ ghcb_set_sw_exit_info_1(ghcb, 1);
+ ghcb_set_sw_exit_info_2(ghcb,
+ X86_TRAP_UD |
+ SVM_EVTINJ_TYPE_EXEPT |
+ SVM_EVTINJ_VALID);
+ }
+
+ ret = 1;
+ break;
+ }
case SVM_VMGEXIT_UNSUPPORTED_EVENT:
vcpu_unimpl(&svm->vcpu,
"vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
@@ -1790,3 +1823,20 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
return kvm_sev_es_string_io(&svm->vcpu, size, port,
svm->ghcb_sa, svm->ghcb_sa_len, in);
}
+
+void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ /* First SIPI: Use the values as initially set by the VMM */
+ if (!svm->ap_hlt_loop)
+ return;
+
+ /*
+ * Subsequent SIPI: Return from an AP Reset Hold VMGEXIT, where
+ * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a
+ * non-zero value.
+ */
+ ghcb_set_sw_exit_info_2(svm->ghcb, 1);
+ svm->ap_hlt_loop = false;
+}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 8d22ae25a0f8..2dbc20701ef5 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4400,6 +4400,11 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
(vmcb_is_intercept(&svm->vmcb->control, INTERCEPT_INIT));
}
+static void svm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
+{
+ sev_vcpu_deliver_sipi_vector(vcpu, vector);
+}
+
static void svm_vm_destroy(struct kvm *kvm)
{
avic_vm_destroy(kvm);
@@ -4541,6 +4546,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.apic_init_signal_blocked = svm_apic_init_signal_blocked,
.msr_filter_changed = svm_msr_filter_changed,
+
+ .vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
};
static struct kvm_x86_init_ops svm_init_ops __initdata = {
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index b3f03dede6ac..5d570d5a6a2c 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -68,6 +68,7 @@ struct kvm_sev_info {
int fd; /* SEV device fd */
unsigned long pages_locked; /* Number of pages locked */
struct list_head regions_list; /* List of registered regions */
+ u64 ap_jump_table; /* SEV-ES AP Jump Table address */
};
struct kvm_svm {
@@ -174,6 +175,7 @@ struct vcpu_svm {
struct vmcb_save_area *vmsa;
struct ghcb *ghcb;
struct kvm_host_map ghcb_map;
+ bool ap_hlt_loop;
/* SEV-ES scratch area support */
void *ghcb_sa;
@@ -574,5 +576,6 @@ void sev_hardware_teardown(void);
void sev_free_vcpu(struct kvm_vcpu *vcpu);
int sev_handle_vmgexit(struct vcpu_svm *svm);
int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);
+void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ddd614a76744..4fd216b61a89 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10144,6 +10144,15 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
{
struct kvm_segment cs;
+ /*
+ * Guests with protected state can't have their state altered by KVM,
+ * call the vcpu_deliver_sipi_vector() x86 op for processing.
+ */
+ if (vcpu->arch.guest_state_protected) {
+ kvm_x86_ops.vcpu_deliver_sipi_vector(vcpu, vector);
+ return;
+ }
+
kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
cs.selector = vector << 8;
cs.base = vector << 12;