[PATCH DO NOT APPLY] KVM: SVM: Cache volatile task state to avoid VMSAVE in core run loop

From: Sean Christopherson
Date: Tue Apr 30 2024 - 18:44:44 EST


Do not apply. This code is not functional, and is probably a terrible
idea in the first place.

Not-signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---

Tom, this is the idea I was talking about at PUCK for avoiding VMSAVE when a
vCPU is scheduled back in. The idea is to:

1) Do VMSAVE during KVM_RUN's vcpu_load()
2) Snapshot the state that is per-task and manually shove that state into the
save area (via memory) during svm_prepare_switch_to_guest().
3) For state that is (theoretically) common to all tasks, e.g. the SYSENTER
entry point, rely on the initial VMSAVE.
4) Refresh the snapshot during kvm_sched_in() if the vCPU is being migrated to
a different pCPU to account for per-CPU state, e.g. GS.base.

QEMU segfaults on FS: access though, specifically if I omit the VMSAVE in the
sched_in path, so I'm missing something.

And this really was so supposed to be for illustrative purposes only, I just
couldn't resist seeing if it would work :-)

arch/x86/kvm/svm/svm.c | 34 ++++++++++++++++++++++++----------
arch/x86/kvm/svm/svm.h | 5 +++++
2 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 6d9763dc4fed..348addf0d4cf 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1499,6 +1499,7 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
+ struct vmcb_save_area *sa = page_address(sd->save_area);

if (sev_es_guest(vcpu->kvm))
sev_es_unmap_ghcb(svm);
@@ -1506,17 +1507,13 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
if (svm->guest_state_loaded)
return;

- /*
- * Save additional host state that will be restored on VMEXIT (sev-es)
- * or subsequent vmload of host save area.
- */
- vmsave(sd->save_area_pa);
- if (sev_es_guest(vcpu->kvm)) {
- struct sev_es_save_area *hostsa;
- hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400);
+ sa->ldtr = svm->saved_ldtr;
+ sa->tr = svm->saved_tr;
+ sa->fs = svm->saved_fs;
+ sa->kernel_gs_base = svm->saved_kernel_gs_base;

- sev_es_prepare_switch_to_guest(svm, hostsa);
- }
+ if (sev_es_guest(vcpu->kvm))
+ sev_es_prepare_switch_to_guest(svm, (void *)sa + 0x400);

if (tsc_scaling)
__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
@@ -1543,6 +1540,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu, bool sched_in)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
+ struct vmcb_save_area *sa = page_address(sd->save_area);

if (sched_in && !kvm_pause_in_guest(vcpu->kvm))
shrink_ple_window(vcpu);
@@ -1555,6 +1553,22 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu, bool sched_in)
}
if (kvm_vcpu_apicv_active(vcpu))
avic_vcpu_load(vcpu, cpu);
+
+ /*
+ * If the vCPU is being loaded for KVM_RUN, save additional host state
+ * that will be restored on VMEXIT (sev-es) or subsequent vmload of
+ * host save area. No need to re-save state if the vCPU task was
+ * scheduled out from within KVM_RUN and is being scheduled back in on
+ * the same pCPU.
+ */
+ if (vcpu->wants_to_run && (vcpu->cpu != cpu || !sched_in)) {
+ vmsave(sd->save_area_pa);
+
+ svm->saved_ldtr = sa->ldtr;
+ svm->saved_tr = sa->tr;
+ svm->saved_fs = sa->fs;
+ svm->saved_kernel_gs_base = sa->kernel_gs_base;
+ }
}

static void svm_vcpu_put(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 323901782547..d0b93a69453a 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -219,6 +219,11 @@ struct vcpu_svm {
u32 sysenter_eip_hi;
uint64_t tsc_aux;

+ struct vmcb_seg saved_ldtr;
+ struct vmcb_seg saved_tr;
+ struct vmcb_seg saved_fs;
+ u64 saved_kernel_gs_base;
+
u64 msr_decfg;

u64 next_rip;

base-commit: 662c1834094920e74ef48f6d52eba112094ae730
--
2.45.0.rc0.197.gbae5840b3b-goog