Re: [PATCH v19 079/130] KVM: TDX: vcpu_run: save/restore host state(host kernel gs)

From: Binbin Wu
Date: Sat Apr 06 2024 - 23:03:13 EST




On 2/26/2024 4:26 PM, isaku.yamahata@xxxxxxxxx wrote:
From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>

On entering/exiting TDX vcpu, Preserved or clobbered CPU state is different
from VMX case.

Could you add more descriptions about the differences?

Add TDX hooks to save/restore host/guest CPU state.

KVM doesn't save/restore guest CPU state for TDX.

Save/restore kernel GS base MSR.

Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>
Reviewed-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
---
arch/x86/kvm/vmx/main.c | 30 +++++++++++++++++++++++++--
arch/x86/kvm/vmx/tdx.c | 42 ++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/vmx/tdx.h | 4 ++++
arch/x86/kvm/vmx/x86_ops.h | 4 ++++
4 files changed, 78 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index d72651ce99ac..8275a242ce07 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -158,6 +158,32 @@ static void vt_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmx_vcpu_reset(vcpu, init_event);
}
+static void vt_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
+{
+ /*
+ * All host state is saved/restored across SEAMCALL/SEAMRET,

It sounds confusing to me.
If all host states are saved/restored across SEAMCALL/SEAMRET, why this patch saves/restores MSR_KERNEL_GS_BASE for host?

and the
+ * guest state of a TD is obviously off limits. Deferring MSRs and DRs
+ * is pointless because the TDX module needs to load *something* so as
+ * not to expose guest state.
+ */
+ if (is_td_vcpu(vcpu)) {
+ tdx_prepare_switch_to_guest(vcpu);
+ return;
+ }
+
+ vmx_prepare_switch_to_guest(vcpu);
+}
+
+static void vt_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ if (is_td_vcpu(vcpu)) {
+ tdx_vcpu_put(vcpu);
+ return;
+ }
+
+ vmx_vcpu_put(vcpu);
+}
+
static int vt_vcpu_pre_run(struct kvm_vcpu *vcpu)
{
if (is_td_vcpu(vcpu))
@@ -326,9 +352,9 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.vcpu_free = vt_vcpu_free,
.vcpu_reset = vt_vcpu_reset,
- .prepare_switch_to_guest = vmx_prepare_switch_to_guest,
+ .prepare_switch_to_guest = vt_prepare_switch_to_guest,
.vcpu_load = vmx_vcpu_load,
- .vcpu_put = vmx_vcpu_put,
+ .vcpu_put = vt_vcpu_put,
.update_exception_bitmap = vmx_update_exception_bitmap,
.get_msr_feature = vmx_get_msr_feature,
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index fdf9196cb592..9616b1aab6ce 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/cpu.h>
+#include <linux/mmu_context.h>
#include <asm/tdx.h>
@@ -423,6 +424,7 @@ u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
int tdx_vcpu_create(struct kvm_vcpu *vcpu)
{
struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
+ struct vcpu_tdx *tdx = to_tdx(vcpu);
WARN_ON_ONCE(vcpu->arch.cpuid_entries);
WARN_ON_ONCE(vcpu->arch.cpuid_nent);
@@ -446,9 +448,47 @@ int tdx_vcpu_create(struct kvm_vcpu *vcpu)
if ((kvm_tdx->xfam & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE)
vcpu->arch.xfd_no_write_intercept = true;
+ tdx->host_state_need_save = true;
+ tdx->host_state_need_restore = false;
+
return 0;
}
+void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)

Just like vmx_prepare_switch_to_host(), the input can be "struct vcpu_tdx *", since vcpu is not used inside the function.
And the callsites just use "to_tdx(vcpu)"

+{
+ struct vcpu_tdx *tdx = to_tdx(vcpu);
Then, this can be dropped.

+
+ if (!tdx->host_state_need_save)
+ return;
+
+ if (likely(is_64bit_mm(current->mm)))
+ tdx->msr_host_kernel_gs_base = current->thread.gsbase;
+ else
+ tdx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
+
+ tdx->host_state_need_save = false;
+}
+
+static void tdx_prepare_switch_to_host(struct kvm_vcpu *vcpu)

ditto

+{
+ struct vcpu_tdx *tdx = to_tdx(vcpu);
+
+ tdx->host_state_need_save = true;
+ if (!tdx->host_state_need_restore)
+ return;
+
+ ++vcpu->stat.host_state_reload;
+
+ wrmsrl(MSR_KERNEL_GS_BASE, tdx->msr_host_kernel_gs_base);
+ tdx->host_state_need_restore = false;
+}
+
+void tdx_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ vmx_vcpu_pi_put(vcpu);
+ tdx_prepare_switch_to_host(vcpu);
+}
+
void tdx_vcpu_free(struct kvm_vcpu *vcpu)
{
struct vcpu_tdx *tdx = to_tdx(vcpu);
@@ -569,6 +609,8 @@ fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu)
tdx_vcpu_enter_exit(tdx);
+ tdx->host_state_need_restore = true;
+
vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
trace_kvm_exit(vcpu, KVM_ISA_VMX);
diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
index 81d301fbe638..e96c416e73bf 100644
--- a/arch/x86/kvm/vmx/tdx.h
+++ b/arch/x86/kvm/vmx/tdx.h
@@ -69,6 +69,10 @@ struct vcpu_tdx {
bool initialized;
+ bool host_state_need_save;
+ bool host_state_need_restore;
+ u64 msr_host_kernel_gs_base;
+
/*
* Dummy to make pmu_intel not corrupt memory.
* TODO: Support PMU for TDX. Future work.
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index 3e29a6fe28ef..9fd997c79c33 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -151,6 +151,8 @@ int tdx_vcpu_create(struct kvm_vcpu *vcpu);
void tdx_vcpu_free(struct kvm_vcpu *vcpu);
void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu);
+void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
+void tdx_vcpu_put(struct kvm_vcpu *vcpu);
u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp);
@@ -186,6 +188,8 @@ static inline int tdx_vcpu_create(struct kvm_vcpu *vcpu) { return -EOPNOTSUPP; }
static inline void tdx_vcpu_free(struct kvm_vcpu *vcpu) {}
static inline void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) {}
static inline fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu) { return EXIT_FASTPATH_NONE; }
+static inline void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) {}
+static inline void tdx_vcpu_put(struct kvm_vcpu *vcpu) {}
static inline u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) { return 0; }
static inline int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) { return -EOPNOTSUPP; }