[PATCH Part2 RFC v3 37/37] KVM: SVM: Support SEV-SNP AP Creation NAE event

From: Brijesh Singh
Date: Wed Jun 02 2021 - 10:17:49 EST


From: Tom Lendacky <thomas.lendacky@xxxxxxx>

Add support for the SEV-SNP AP Creation NAE event. This allows SEV-SNP
guests to create and start APs on their own.

A new event, KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, is created and used
so as to avoid updating the VMSA pointer while the vCPU is running.

For CREATE
The guest supplies the GPA of the VMSA to be used for the vCPU with the
specified APIC ID. The GPA is saved in the svm struct of the target
vCPU, the KVM_REQ_UPDATE_PROTECTED_GUEST_STATE event is added to the
vCPU and then the vCPU is kicked.

For CREATE_ON_INIT:
The guest supplies the GPA of the VMSA to be used for the vCPU with the
specified APIC ID the next time an INIT is performed. The GPA is saved
in the svm struct of the target vCPU.

For DESTROY:
The guest indicates it wishes to stop the vCPU. The GPA is cleared from
the svm struct, the KVM_REQ_UPDATE_PROTECTED_GUEST_STATE event is added
to vCPU and then the vCPU is kicked.


The KVM_REQ_UPDATE_PROTECTED_GUEST_STATE event handler will be invoked as
a result of the event or as a result of an INIT. The handler sets the vCPU
to the KVM_MP_STATE_UNINITIALIZED state, so that any errors will leave the
vCPU as not runnable. Any previous VMSA pages that were installed as
part of an SEV-SNP AP Creation NAE event are un-pinned. If a new VMSA is
to be installed, the VMSA guest page is pinned and set as the VMSA in the
vCPU VMCB and the vCPU state is set to KVM_MP_STATE_RUNNABLE. If a new
VMSA is not to be installed, the VMSA is cleared in the vCPU VMCB and the
vCPU state is left as KVM_MP_STATE_UNINITIALIZED to prevent it from being
run.

Signed-off-by: Tom Lendacky <thomas.lendacky@xxxxxxx>
Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 3 +
arch/x86/include/asm/svm.h | 6 ++
arch/x86/kvm/svm/sev.c | 128 ++++++++++++++++++++++++++++++++
arch/x86/kvm/svm/svm.c | 7 +-
arch/x86/kvm/svm/svm.h | 8 +-
arch/x86/kvm/x86.c | 11 ++-
6 files changed, 160 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 117e2e08d7ed..881e05b3f74e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -91,6 +91,7 @@
#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29)
#define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \
KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_UPDATE_PROTECTED_GUEST_STATE KVM_ARCH_REQ(31)

#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -1402,6 +1403,8 @@ struct kvm_x86_ops {

int (*handle_rmp_page_fault)(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_pfn_t pfn,
int level, u64 error_code);
+
+ void (*update_protected_guest_state)(struct kvm_vcpu *vcpu);
};

struct kvm_x86_nested_ops {
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 65407b6d35a0..82242d871a88 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -213,6 +213,12 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define SVM_NESTED_CTL_SEV_ES_ENABLE BIT(2)

#define SVM_SEV_FEATURES_SNP_ACTIVE BIT(0)
+#define SVM_SEV_FEATURES_RESTRICTED_INJECTION BIT(3)
+#define SVM_SEV_FEATURES_ALTERNATE_INJECTION BIT(4)
+
+#define SVM_SEV_FEATURES_INT_INJ_MODES \
+ (SVM_SEV_FEATURES_RESTRICTED_INJECTION | \
+ SVM_SEV_FEATURES_ALTERNATE_INJECTION)

struct vmcb_seg {
u16 selector;
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 047f4dbde99b..05292985c49d 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2653,6 +2653,10 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
if (!ghcb_sw_scratch_is_valid(ghcb))
goto vmgexit_err;
break;
+ case SVM_VMGEXIT_AP_CREATION:
+ if (!ghcb_rax_is_valid(ghcb))
+ goto vmgexit_err;
+ break;
case SVM_VMGEXIT_NMI_COMPLETE:
case SVM_VMGEXIT_AP_HLT_LOOP:
case SVM_VMGEXIT_AP_JUMP_TABLE:
@@ -3260,6 +3264,123 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
return ret;
}

+void sev_snp_update_protected_guest_state(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ kvm_pfn_t pfn;
+
+ mutex_lock(&svm->snp_vmsa_mutex);
+
+ vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
+
+ /* Clear use of the VMSA in the sev_es_init_vmcb() path */
+ svm->vmsa_pa = 0;
+
+ /* Clear use of the VMSA from the VMCB */
+ svm->vmcb->control.vmsa_pa = 0;
+
+ /* Un-pin previous VMSA */
+ if (svm->snp_vmsa_pfn) {
+ kvm_release_pfn_dirty(svm->snp_vmsa_pfn);
+ svm->snp_vmsa_pfn = 0;
+ }
+
+ if (svm->snp_vmsa_gpa) {
+ /* Validate that the GPA is page aligned */
+ if (!PAGE_ALIGNED(svm->snp_vmsa_gpa))
+ goto e_unlock;
+
+ /*
+ * The VMSA is referenced by thy hypervisor physical address,
+ * so retrieve the PFN and pin it.
+ */
+ pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(svm->snp_vmsa_gpa));
+ if (is_error_pfn(pfn))
+ goto e_unlock;
+
+ svm->snp_vmsa_pfn = pfn;
+
+ /* Use the new VMSA in the sev_es_init_vmcb() path */
+ svm->vmsa_pa = pfn_to_hpa(pfn);
+ svm->vmcb->control.vmsa_pa = svm->vmsa_pa;
+
+ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ } else {
+ vcpu->arch.pv.pv_unhalted = false;
+ vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
+ }
+
+e_unlock:
+ mutex_unlock(&svm->snp_vmsa_mutex);
+}
+
+static void sev_snp_ap_creation(struct vcpu_svm *svm)
+{
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ struct kvm_vcpu *target_vcpu;
+ struct vcpu_svm *target_svm;
+ unsigned int request;
+ unsigned int apic_id;
+ u64 sev_features;
+ u64 int_inj_mode;
+ bool kick;
+
+ request = lower_32_bits(svm->vmcb->control.exit_info_1);
+ apic_id = upper_32_bits(svm->vmcb->control.exit_info_1);
+
+ /* Validate the APIC ID */
+ target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, apic_id);
+ if (!target_vcpu)
+ return;
+
+ target_svm = to_svm(target_vcpu);
+
+ /*
+ * Restricted Injection mode and Alternate Injection mode are
+ * not supported.
+ */
+ sev_features = vcpu->arch.regs[VCPU_REGS_RAX];
+ int_inj_mode = sev_features & SVM_SEV_FEATURES_INT_INJ_MODES;
+
+ kick = true;
+
+ mutex_lock(&target_svm->snp_vmsa_mutex);
+
+ target_svm->snp_vmsa_gpa = 0;
+ target_svm->snp_vmsa_update_on_init = false;
+
+ switch (request) {
+ case SVM_VMGEXIT_AP_CREATE_ON_INIT:
+ if (!int_inj_mode) {
+ target_svm->snp_vmsa_update_on_init = true;
+ kick = false;
+ }
+ fallthrough;
+ case SVM_VMGEXIT_AP_CREATE:
+ if (!int_inj_mode) {
+ target_svm->snp_vmsa_gpa = svm->vmcb->control.exit_info_2;
+ break;
+ }
+
+ vcpu_unimpl(vcpu, "vmgexit: invalid AP injection mode [%#llx] from guest\n",
+ int_inj_mode);
+ break;
+ case SVM_VMGEXIT_AP_DESTROY:
+ break;
+ default:
+ vcpu_unimpl(vcpu, "vmgexit: invalid AP creation request [%#x] from guest\n",
+ request);
+ break;
+ }
+
+ mutex_unlock(&target_svm->snp_vmsa_mutex);
+
+ if (kick) {
+ kvm_make_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, target_vcpu);
+ kvm_vcpu_kick(target_vcpu);
+ }
+}
+
int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3379,6 +3500,11 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
ret = 1;
break;
}
+ case SVM_VMGEXIT_AP_CREATION:
+ sev_snp_ap_creation(svm);
+
+ ret = 1;
+ break;
case SVM_VMGEXIT_UNSUPPORTED_EVENT:
vcpu_unimpl(vcpu,
"vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
@@ -3453,6 +3579,8 @@ void sev_es_create_vcpu(struct vcpu_svm *svm)
set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
GHCB_VERSION_MIN,
sev_enc_bit));
+
+ mutex_init(&svm->snp_vmsa_mutex);
}

void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 74bc635c9608..078a569c85a8 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1304,7 +1304,10 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
svm->spec_ctrl = 0;
svm->virt_spec_ctrl = 0;

- if (!init_event) {
+ if (init_event && svm->snp_vmsa_update_on_init) {
+ svm->snp_vmsa_update_on_init = false;
+ sev_snp_update_protected_guest_state(vcpu);
+ } else {
vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE |
MSR_IA32_APICBASE_ENABLE;
if (kvm_vcpu_is_reset_bsp(vcpu))
@@ -4588,6 +4591,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.write_page_begin = sev_snp_write_page_begin,

.handle_rmp_page_fault = snp_handle_rmp_page_fault,
+
+ .update_protected_guest_state = sev_snp_update_protected_guest_state,
};

static struct kvm_x86_init_ops svm_init_ops __initdata = {
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 52fd3cf30ad9..abd3f3cec7cf 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -191,6 +191,11 @@ struct vcpu_svm {
bool guest_state_loaded;

u64 ghcb_registered_gpa;
+
+ struct mutex snp_vmsa_mutex;
+ gpa_t snp_vmsa_gpa;
+ kvm_pfn_t snp_vmsa_pfn;
+ bool snp_vmsa_update_on_init; /* SEV-SNP AP Creation on INIT-SIPI */
};

struct svm_cpu_data {
@@ -554,7 +559,7 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu);
#define GHCB_VERSION_MAX 2ULL
#define GHCB_VERSION_MIN 1ULL

-#define GHCB_HV_FT_SUPPORTED GHCB_HV_FT_SNP
+#define GHCB_HV_FT_SUPPORTED (GHCB_HV_FT_SNP | GHCB_HV_FT_SNP_AP_CREATION)

extern unsigned int max_sev_asid;

@@ -583,6 +588,7 @@ int sev_get_tdp_max_page_level(struct kvm_vcpu *vcpu, gpa_t gpa, int max_level);
void sev_snp_write_page_begin(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn);
int snp_handle_rmp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_pfn_t pfn,
int level, u64 error_code);
+void sev_snp_update_protected_guest_state(struct kvm_vcpu *vcpu);

/* vmenter.S */

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1398b8021982..e9fd59913bc2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9279,6 +9279,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)

if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu))
static_call(kvm_x86_update_cpu_dirty_logging)(vcpu);
+
+ if (kvm_check_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, vcpu)) {
+ kvm_x86_ops.update_protected_guest_state(vcpu);
+ if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) {
+ r = 1;
+ goto out;
+ }
+ }
}

if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win ||
@@ -11236,7 +11244,8 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
if (!list_empty_careful(&vcpu->async_pf.done))
return true;

- if (kvm_apic_has_events(vcpu))
+ if (kvm_apic_has_events(vcpu) ||
+ kvm_test_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, vcpu))
return true;

if (vcpu->arch.pv.pv_unhalted)
--
2.17.1