[PATCH v4 05/19] KVM: VMX: Initialize VMCS FRED fields

From: Xin Li (Intel)
Date: Fri Mar 28 2025 - 13:17:27 EST


From: Xin Li <xin3.li@xxxxxxxxx>

Initialize host VMCS FRED fields with host FRED MSRs' value and
guest VMCS FRED fields to 0.

FRED CPU state is managed in 9 new FRED MSRs:
IA32_FRED_CONFIG,
IA32_FRED_STKLVLS,
IA32_FRED_RSP0,
IA32_FRED_RSP1,
IA32_FRED_RSP2,
IA32_FRED_RSP3,
IA32_FRED_SSP1,
IA32_FRED_SSP2,
IA32_FRED_SSP3,
as well as a few existing CPU registers and MSRs:
CR4.FRED,
IA32_STAR,
IA32_KERNEL_GS_BASE,
IA32_PL0_SSP (also known as IA32_FRED_SSP0).

CR4, IA32_KERNEL_GS_BASE and IA32_STAR are already well managed.
Except IA32_FRED_RSP0 and IA32_FRED_SSP0, all other FRED CPU state
MSRs have corresponding VMCS fields in both the host-state and
guest-state areas. So KVM just needs to initialize them, and with
proper VM entry/exit FRED controls, a FRED CPU will keep tracking
host and guest FRED CPU state in VMCS automatically.

Signed-off-by: Xin Li <xin3.li@xxxxxxxxx>
Signed-off-by: Xin Li (Intel) <xin@xxxxxxxxx>
Tested-by: Shan Kang <shan.kang@xxxxxxxxx>
---

Change in v4:
* Initialize host SSP[1-3] to 0s in vmx_set_constant_host_state()
because Linux doesn't support kernel shadow stacks (Chao Gao).

Change in v3:
* Use structure kvm_host_values to keep host fred config & stack levels
(Sean Christopherson).

Changes in v2:
* Use kvm_cpu_cap_has() instead of cpu_feature_enabled() to decouple
KVM's capability to virtualize a feature and host's enabling of a
feature (Chao Gao).
* Move guest FRED state init into __vmx_vcpu_reset() (Chao Gao).
---
arch/x86/include/asm/vmx.h | 32 ++++++++++++++++++++++++++++++++
arch/x86/kvm/vmx/vmx.c | 36 ++++++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.h | 3 +++
3 files changed, 71 insertions(+)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 5598517617a5..8a2b097aadf2 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -289,12 +289,44 @@ enum vmcs_field {
GUEST_BNDCFGS_HIGH = 0x00002813,
GUEST_IA32_RTIT_CTL = 0x00002814,
GUEST_IA32_RTIT_CTL_HIGH = 0x00002815,
+ GUEST_IA32_FRED_CONFIG = 0x0000281a,
+ GUEST_IA32_FRED_CONFIG_HIGH = 0x0000281b,
+ GUEST_IA32_FRED_RSP1 = 0x0000281c,
+ GUEST_IA32_FRED_RSP1_HIGH = 0x0000281d,
+ GUEST_IA32_FRED_RSP2 = 0x0000281e,
+ GUEST_IA32_FRED_RSP2_HIGH = 0x0000281f,
+ GUEST_IA32_FRED_RSP3 = 0x00002820,
+ GUEST_IA32_FRED_RSP3_HIGH = 0x00002821,
+ GUEST_IA32_FRED_STKLVLS = 0x00002822,
+ GUEST_IA32_FRED_STKLVLS_HIGH = 0x00002823,
+ GUEST_IA32_FRED_SSP1 = 0x00002824,
+ GUEST_IA32_FRED_SSP1_HIGH = 0x00002825,
+ GUEST_IA32_FRED_SSP2 = 0x00002826,
+ GUEST_IA32_FRED_SSP2_HIGH = 0x00002827,
+ GUEST_IA32_FRED_SSP3 = 0x00002828,
+ GUEST_IA32_FRED_SSP3_HIGH = 0x00002829,
HOST_IA32_PAT = 0x00002c00,
HOST_IA32_PAT_HIGH = 0x00002c01,
HOST_IA32_EFER = 0x00002c02,
HOST_IA32_EFER_HIGH = 0x00002c03,
HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04,
HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05,
+ HOST_IA32_FRED_CONFIG = 0x00002c08,
+ HOST_IA32_FRED_CONFIG_HIGH = 0x00002c09,
+ HOST_IA32_FRED_RSP1 = 0x00002c0a,
+ HOST_IA32_FRED_RSP1_HIGH = 0x00002c0b,
+ HOST_IA32_FRED_RSP2 = 0x00002c0c,
+ HOST_IA32_FRED_RSP2_HIGH = 0x00002c0d,
+ HOST_IA32_FRED_RSP3 = 0x00002c0e,
+ HOST_IA32_FRED_RSP3_HIGH = 0x00002c0f,
+ HOST_IA32_FRED_STKLVLS = 0x00002c10,
+ HOST_IA32_FRED_STKLVLS_HIGH = 0x00002c11,
+ HOST_IA32_FRED_SSP1 = 0x00002c12,
+ HOST_IA32_FRED_SSP1_HIGH = 0x00002c13,
+ HOST_IA32_FRED_SSP2 = 0x00002c14,
+ HOST_IA32_FRED_SSP2_HIGH = 0x00002c15,
+ HOST_IA32_FRED_SSP3 = 0x00002c16,
+ HOST_IA32_FRED_SSP3_HIGH = 0x00002c17,
PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
EXCEPTION_BITMAP = 0x00004004,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index ab84939ace96..ac6aa2d091c3 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1505,6 +1505,15 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
(unsigned long)(cpu_entry_stack(cpu) + 1));
}

+ /* Per-CPU FRED MSRs */
+ if (kvm_cpu_cap_has(X86_FEATURE_FRED)) {
+#ifdef CONFIG_X86_64
+ vmcs_write64(HOST_IA32_FRED_RSP1, __this_cpu_ist_top_va(DB));
+ vmcs_write64(HOST_IA32_FRED_RSP2, __this_cpu_ist_top_va(NMI));
+ vmcs_write64(HOST_IA32_FRED_RSP3, __this_cpu_ist_top_va(DF));
+#endif
+ }
+
vmx->loaded_vmcs->cpu = cpu;
}
}
@@ -4388,6 +4397,17 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
*/
vmcs_write16(HOST_DS_SELECTOR, 0);
vmcs_write16(HOST_ES_SELECTOR, 0);
+
+ if (kvm_cpu_cap_has(X86_FEATURE_FRED)) {
+ /* FRED CONFIG and STKLVLS are the same on all CPUs */
+ vmcs_write64(HOST_IA32_FRED_CONFIG, kvm_host.fred_config);
+ vmcs_write64(HOST_IA32_FRED_STKLVLS, kvm_host.fred_stklvls);
+
+ /* Linux doesn't support kernel shadow stacks, thus SSPs are 0s */
+ vmcs_write64(HOST_IA32_FRED_SSP1, 0);
+ vmcs_write64(HOST_IA32_FRED_SSP2, 0);
+ vmcs_write64(HOST_IA32_FRED_SSP3, 0);
+ }
#else
vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */
@@ -4889,6 +4909,17 @@ static void init_vmcs(struct vcpu_vmx *vmx)
}

vmx_setup_uret_msrs(vmx);
+
+ if (kvm_cpu_cap_has(X86_FEATURE_FRED)) {
+ vmcs_write64(GUEST_IA32_FRED_CONFIG, 0);
+ vmcs_write64(GUEST_IA32_FRED_RSP1, 0);
+ vmcs_write64(GUEST_IA32_FRED_RSP2, 0);
+ vmcs_write64(GUEST_IA32_FRED_RSP3, 0);
+ vmcs_write64(GUEST_IA32_FRED_STKLVLS, 0);
+ vmcs_write64(GUEST_IA32_FRED_SSP1, 0);
+ vmcs_write64(GUEST_IA32_FRED_SSP2, 0);
+ vmcs_write64(GUEST_IA32_FRED_SSP3, 0);
+ }
}

static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu)
@@ -8705,6 +8736,11 @@ __init int vmx_hardware_setup(void)

kvm_set_posted_intr_wakeup_handler(pi_wakeup_handler);

+ if (kvm_cpu_cap_has(X86_FEATURE_FRED)) {
+ rdmsrl(MSR_IA32_FRED_CONFIG, kvm_host.fred_config);
+ rdmsrl(MSR_IA32_FRED_STKLVLS, kvm_host.fred_stklvls);
+ }
+
return r;
}

diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 9dc32a409076..02514f5b9c0b 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -46,6 +46,9 @@ struct kvm_host_values {
u64 xcr0;
u64 xss;
u64 arch_capabilities;
+
+ u64 fred_config;
+ u64 fred_stklvls;
};

void kvm_spurious_fault(void);
--
2.48.1