[PATCH Part2 v6 35/49] KVM: SVM: Remove the long-lived GHCB host map

From: Ashish Kalra
Date: Mon Jun 20 2022 - 19:15:38 EST


From: Brijesh Singh <brijesh.singh@xxxxxxx>

On VMGEXIT, sev_handle_vmgexit() creates a host mapping for the GHCB GPA,
and unmaps it just before VM-entry. This long-lived GHCB map is used by
the VMGEXIT handler through accessors such as ghcb_{set_get}_xxx().

A long-lived GHCB map can cause issue when SEV-SNP is enabled. When
SEV-SNP is enabled the mapped GPA needs to be protected against a page
state change.

To eliminate the long-lived GHCB mapping, update the GHCB sync operations
to explicitly map the GHCB before access and unmap it after access is
complete. This requires that the setting of the GHCBs sw_exit_info_{1,2}
fields be done during sev_es_sync_to_ghcb(), so create two new fields in
the vcpu_svm struct to hold these values when required to be set outside
of the GHCB mapping.

Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx>
---
arch/x86/kvm/svm/sev.c | 131 ++++++++++++++++++++++++++---------------
arch/x86/kvm/svm/svm.c | 12 ++--
arch/x86/kvm/svm/svm.h | 24 +++++++-
3 files changed, 111 insertions(+), 56 deletions(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 01ea257e17d6..c70f3f7e06a8 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2823,15 +2823,40 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
kvfree(svm->sev_es.ghcb_sa);
}

+static inline int svm_map_ghcb(struct vcpu_svm *svm, struct kvm_host_map *map)
+{
+ struct vmcb_control_area *control = &svm->vmcb->control;
+ u64 gfn = gpa_to_gfn(control->ghcb_gpa);
+
+ if (kvm_vcpu_map(&svm->vcpu, gfn, map)) {
+ /* Unable to map GHCB from guest */
+ pr_err("error mapping GHCB GFN [%#llx] from guest\n", gfn);
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static inline void svm_unmap_ghcb(struct vcpu_svm *svm, struct kvm_host_map *map)
+{
+ kvm_vcpu_unmap(&svm->vcpu, map, true);
+}
+
static void dump_ghcb(struct vcpu_svm *svm)
{
- struct ghcb *ghcb = svm->sev_es.ghcb;
+ struct kvm_host_map map;
unsigned int nbits;
+ struct ghcb *ghcb;
+
+ if (svm_map_ghcb(svm, &map))
+ return;
+
+ ghcb = map.hva;

/* Re-use the dump_invalid_vmcb module parameter */
if (!dump_invalid_vmcb) {
pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n");
- return;
+ goto e_unmap;
}

nbits = sizeof(ghcb->save.valid_bitmap) * 8;
@@ -2846,12 +2871,21 @@ static void dump_ghcb(struct vcpu_svm *svm)
pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch",
ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb));
pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap);
+
+e_unmap:
+ svm_unmap_ghcb(svm, &map);
}

-static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
+static bool sev_es_sync_to_ghcb(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
- struct ghcb *ghcb = svm->sev_es.ghcb;
+ struct kvm_host_map map;
+ struct ghcb *ghcb;
+
+ if (svm_map_ghcb(svm, &map))
+ return false;
+
+ ghcb = map.hva;

/*
* The GHCB protocol so far allows for the following data
@@ -2865,13 +2899,24 @@ static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]);
ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]);
ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]);
+
+ /*
+ * Copy the return values from the exit_info_{1,2}.
+ */
+ ghcb_set_sw_exit_info_1(ghcb, svm->sev_es.ghcb_sw_exit_info_1);
+ ghcb_set_sw_exit_info_2(ghcb, svm->sev_es.ghcb_sw_exit_info_2);
+
+ trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, ghcb);
+
+ svm_unmap_ghcb(svm, &map);
+
+ return true;
}

-static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
+static void sev_es_sync_from_ghcb(struct vcpu_svm *svm, struct ghcb *ghcb)
{
struct vmcb_control_area *control = &svm->vmcb->control;
struct kvm_vcpu *vcpu = &svm->vcpu;
- struct ghcb *ghcb = svm->sev_es.ghcb;
u64 exit_code;

/*
@@ -2915,20 +2960,25 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
}

-static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
+static int sev_es_validate_vmgexit(struct vcpu_svm *svm, u64 *exit_code)
{
- struct kvm_vcpu *vcpu;
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ struct kvm_host_map map;
struct ghcb *ghcb;
- u64 exit_code;
u64 reason;

- ghcb = svm->sev_es.ghcb;
+ if (svm_map_ghcb(svm, &map))
+ return -EFAULT;
+
+ ghcb = map.hva;
+
+ trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb);

/*
* Retrieve the exit code now even though it may not be marked valid
* as it could help with debugging.
*/
- exit_code = ghcb_get_sw_exit_code(ghcb);
+ *exit_code = ghcb_get_sw_exit_code(ghcb);

/* Only GHCB Usage code 0 is supported */
if (ghcb->ghcb_usage) {
@@ -3021,6 +3071,9 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
goto vmgexit_err;
}

+ sev_es_sync_from_ghcb(svm, ghcb);
+
+ svm_unmap_ghcb(svm, &map);
return 0;

vmgexit_err:
@@ -3031,10 +3084,10 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
ghcb->ghcb_usage);
} else if (reason == GHCB_ERR_INVALID_EVENT) {
vcpu_unimpl(vcpu, "vmgexit: exit code %#llx is not valid\n",
- exit_code);
+ *exit_code);
} else {
vcpu_unimpl(vcpu, "vmgexit: exit code %#llx input is not valid\n",
- exit_code);
+ *exit_code);
dump_ghcb(svm);
}

@@ -3044,6 +3097,8 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
ghcb_set_sw_exit_info_1(ghcb, 2);
ghcb_set_sw_exit_info_2(ghcb, reason);

+ svm_unmap_ghcb(svm, &map);
+
/* Resume the guest to "return" the error code. */
return 1;
}
@@ -3053,23 +3108,20 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm)
/* Clear any indication that the vCPU is in a type of AP Reset Hold */
svm->sev_es.ap_reset_hold_type = AP_RESET_HOLD_NONE;

- if (!svm->sev_es.ghcb)
+ if (!svm->sev_es.ghcb_in_use)
return;

/* Sync the scratch buffer area. */
if (svm->sev_es.ghcb_sa_sync) {
kvm_write_guest(svm->vcpu.kvm,
- ghcb_get_sw_scratch(svm->sev_es.ghcb),
+ svm->sev_es.ghcb_sa_gpa,
svm->sev_es.ghcb_sa, svm->sev_es.ghcb_sa_len);
svm->sev_es.ghcb_sa_sync = false;
}

- trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->sev_es.ghcb);
-
sev_es_sync_to_ghcb(svm);

- kvm_vcpu_unmap(&svm->vcpu, &svm->sev_es.ghcb_map, true);
- svm->sev_es.ghcb = NULL;
+ svm->sev_es.ghcb_in_use = false;
}

void pre_sev_run(struct vcpu_svm *svm, int cpu)
@@ -3099,7 +3151,6 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
{
struct vmcb_control_area *control = &svm->vmcb->control;
- struct ghcb *ghcb = svm->sev_es.ghcb;
u64 ghcb_scratch_beg, ghcb_scratch_end;
u64 scratch_gpa_beg, scratch_gpa_end;

@@ -3178,8 +3229,8 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
return 0;

e_scratch:
- ghcb_set_sw_exit_info_1(ghcb, 2);
- ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_SCRATCH_AREA);
+ svm_set_ghcb_sw_exit_info_1(&svm->vcpu, 2);
+ svm_set_ghcb_sw_exit_info_2(&svm->vcpu, GHCB_ERR_INVALID_SCRATCH_AREA);

return 1;
}
@@ -3316,7 +3367,6 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb_control_area *control = &svm->vmcb->control;
u64 ghcb_gpa, exit_code;
- struct ghcb *ghcb;
int ret;

/* Validate the GHCB */
@@ -3331,29 +3381,14 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
return 1;
}

- if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->sev_es.ghcb_map)) {
- /* Unable to map GHCB from guest */
- vcpu_unimpl(vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n",
- ghcb_gpa);
-
- /* Without a GHCB, just return right back to the guest */
- return 1;
- }
-
- svm->sev_es.ghcb = svm->sev_es.ghcb_map.hva;
- ghcb = svm->sev_es.ghcb_map.hva;
-
- trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb);
-
- exit_code = ghcb_get_sw_exit_code(ghcb);
-
- ret = sev_es_validate_vmgexit(svm);
+ ret = sev_es_validate_vmgexit(svm, &exit_code);
if (ret)
return ret;

- sev_es_sync_from_ghcb(svm);
- ghcb_set_sw_exit_info_1(ghcb, 0);
- ghcb_set_sw_exit_info_2(ghcb, 0);
+ svm->sev_es.ghcb_in_use = true;
+
+ svm_set_ghcb_sw_exit_info_1(vcpu, 0);
+ svm_set_ghcb_sw_exit_info_2(vcpu, 0);

switch (exit_code) {
case SVM_VMGEXIT_MMIO_READ:
@@ -3393,20 +3428,20 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
break;
case 1:
/* Get AP jump table address */
- ghcb_set_sw_exit_info_2(ghcb, sev->ap_jump_table);
+ svm_set_ghcb_sw_exit_info_2(vcpu, sev->ap_jump_table);
break;
default:
pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n",
control->exit_info_1);
- ghcb_set_sw_exit_info_1(ghcb, 2);
- ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_INPUT);
+ svm_set_ghcb_sw_exit_info_1(vcpu, 2);
+ svm_set_ghcb_sw_exit_info_2(vcpu, GHCB_ERR_INVALID_INPUT);
}

ret = 1;
break;
}
case SVM_VMGEXIT_HV_FEATURES: {
- ghcb_set_sw_exit_info_2(ghcb, GHCB_HV_FT_SUPPORTED);
+ svm_set_ghcb_sw_exit_info_2(vcpu, GHCB_HV_FT_SUPPORTED);

ret = 1;
break;
@@ -3537,7 +3572,7 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
* Return from an AP Reset Hold VMGEXIT, where the guest will
* set the CS and RIP. Set SW_EXIT_INFO_2 to a non-zero value.
*/
- ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1);
+ svm_set_ghcb_sw_exit_info_2(vcpu, 1);
break;
case AP_RESET_HOLD_MSR_PROTO:
/*
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 18e2cd4d9559..b24e0171cbf2 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2720,14 +2720,14 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
{
struct vcpu_svm *svm = to_svm(vcpu);
- if (!err || !sev_es_guest(vcpu->kvm) || WARN_ON_ONCE(!svm->sev_es.ghcb))
+ if (!err || !sev_es_guest(vcpu->kvm) || WARN_ON_ONCE(!svm->sev_es.ghcb_in_use))
return kvm_complete_insn_gp(vcpu, err);

- ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 1);
- ghcb_set_sw_exit_info_2(svm->sev_es.ghcb,
- X86_TRAP_GP |
- SVM_EVTINJ_TYPE_EXEPT |
- SVM_EVTINJ_VALID);
+ svm_set_ghcb_sw_exit_info_1(vcpu, 1);
+ svm_set_ghcb_sw_exit_info_2(vcpu,
+ X86_TRAP_GP |
+ SVM_EVTINJ_TYPE_EXEPT |
+ SVM_EVTINJ_VALID);
return 1;
}

diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index bd0db4d4a61e..c80352c9c0d6 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -189,8 +189,7 @@ struct svm_nested_state {
struct vcpu_sev_es_state {
/* SEV-ES support */
struct sev_es_save_area *vmsa;
- struct ghcb *ghcb;
- struct kvm_host_map ghcb_map;
+ bool ghcb_in_use;
bool received_first_sipi;
unsigned int ap_reset_hold_type;

@@ -200,6 +199,13 @@ struct vcpu_sev_es_state {
u64 ghcb_sa_gpa;
u32 ghcb_sa_alloc_len;
bool ghcb_sa_sync;
+
+ /*
+ * SEV-ES support to hold the sw_exit_info return values to be
+ * sync'ed to the GHCB when mapped.
+ */
+ u64 ghcb_sw_exit_info_1;
+ u64 ghcb_sw_exit_info_2;
};

struct vcpu_svm {
@@ -614,6 +620,20 @@ void nested_sync_control_from_vmcb02(struct vcpu_svm *svm);
void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm);
void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb);

+static inline void svm_set_ghcb_sw_exit_info_1(struct kvm_vcpu *vcpu, u64 val)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ svm->sev_es.ghcb_sw_exit_info_1 = val;
+}
+
+static inline void svm_set_ghcb_sw_exit_info_2(struct kvm_vcpu *vcpu, u64 val)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ svm->sev_es.ghcb_sw_exit_info_2 = val;
+}
+
extern struct kvm_x86_nested_ops svm_nested_ops;

/* avic.c */
--
2.25.1