On Tue, Jan 23, 2024 at 06:41:59PM -0800, Yang Weijiang wrote:
Set up CET MSRs, related VM_ENTRY/EXIT control bits and fixed CR4 settingI think you need to move this hunk outside the outmost if-statement, i.e.,
to enable CET for nested VM.
vmcs12 and vmcs02 needs to be synced when L2 exits to L1 or when L1 wants
to resume L2, that way correct CET states can be observed by one another.
Suggested-by: Chao Gao <chao.gao@xxxxxxxxx>
Signed-off-by: Yang Weijiang <weijiang.yang@xxxxxxxxx>
Reviewed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
---
arch/x86/kvm/vmx/nested.c | 57 +++++++++++++++++++++++++++++++++++++--
arch/x86/kvm/vmx/vmcs12.c | 6 +++++
arch/x86/kvm/vmx/vmcs12.h | 14 +++++++++-
arch/x86/kvm/vmx/vmx.c | 2 ++
4 files changed, 76 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 468a7cf75035..e330897a7e5e 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -691,6 +691,28 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
MSR_IA32_FLUSH_CMD, MSR_TYPE_W);
+ /* Pass CET MSRs to nested VM if L0 and L1 are set to pass-through. */
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_U_CET, MSR_TYPE_RW);
+
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_S_CET, MSR_TYPE_RW);
+
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_PL0_SSP, MSR_TYPE_RW);
+
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_PL1_SSP, MSR_TYPE_RW);
+
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_PL2_SSP, MSR_TYPE_RW);
+
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_PL3_SSP, MSR_TYPE_RW);
+
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_INT_SSP_TAB, MSR_TYPE_RW);
+
kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false);
vmx->nested.force_msr_bitmap_recalc = false;
@@ -2506,6 +2528,17 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
if (kvm_mpx_supported() && vmx->nested.nested_run_pending &&
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
+
+ if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_CET_STATE) {
+ if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK)) {
+ vmcs_writel(GUEST_SSP, vmcs12->guest_ssp);
+ vmcs_writel(GUEST_INTR_SSP_TABLE,
+ vmcs12->guest_ssp_tbl);
+ }
+ if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK) ||
+ guest_can_use(&vmx->vcpu, X86_FEATURE_IBT))
+ vmcs_writel(GUEST_S_CET, vmcs12->guest_s_cet);
+ }
if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1)) {
otherwise, the whole block may be skipped (e.g., when evmcs is enabled and
GUEST_GRP1 is clean), leaving CET state not context-switched.
And if VM_ENTRY_LOAD_CET_STATE of vmcs12 is cleared, L1's values should be
propagated to vmcs02 on nested VMenter; see pre_vmenter_debugctl in struct
nested_vmx. I believe we need similar handling for the three CET fields.
}unnecessary braces.
if (nested_cpu_has_xsaves(vmcs12))
@@ -4344,6 +4377,15 @@ static void sync_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
vmcs12->guest_pending_dbg_exceptions =
vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
+ if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK)) {
+ vmcs12->guest_ssp = vmcs_readl(GUEST_SSP);
+ vmcs12->guest_ssp_tbl = vmcs_readl(GUEST_INTR_SSP_TABLE);
+ }
+ if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK) ||
+ guest_can_use(&vmx->vcpu, X86_FEATURE_IBT)) {
+ vmcs12->guest_s_cet = vmcs_readl(GUEST_S_CET);
+ }