Re: [PATCH v2 10/25] KVM: VMX: Add support for FRED context save/restore

From: Chao Gao
Date: Mon Apr 29 2024 - 02:33:19 EST


On Thu, Feb 08, 2024 at 01:26:30AM +0800, Xin Li wrote:
>Handle host initiated FRED MSR access requests to allow FRED context
>to be set/get from user level.
>

The changelog isn't accurate because guest accesses are also handled
by this patch, specifically in the "else" branch.

>+ if (host_initiated) {
>+ if (!kvm_cpu_cap_has(X86_FEATURE_FRED))
>+ return 1;
>+ } else {



> void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
>@@ -2019,6 +2037,33 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> case MSR_KERNEL_GS_BASE:
> msr_info->data = vmx_read_guest_kernel_gs_base(vmx);
> break;
>+ case MSR_IA32_FRED_RSP0:
>+ msr_info->data = vmx_read_guest_fred_rsp0(vmx);
>+ break;
>+ case MSR_IA32_FRED_RSP1:
>+ msr_info->data = vmcs_read64(GUEST_IA32_FRED_RSP1);
>+ break;
>+ case MSR_IA32_FRED_RSP2:
>+ msr_info->data = vmcs_read64(GUEST_IA32_FRED_RSP2);
>+ break;
>+ case MSR_IA32_FRED_RSP3:
>+ msr_info->data = vmcs_read64(GUEST_IA32_FRED_RSP3);
>+ break;
>+ case MSR_IA32_FRED_STKLVLS:
>+ msr_info->data = vmcs_read64(GUEST_IA32_FRED_STKLVLS);
>+ break;
>+ case MSR_IA32_FRED_SSP1:
>+ msr_info->data = vmcs_read64(GUEST_IA32_FRED_SSP1);
>+ break;
>+ case MSR_IA32_FRED_SSP2:
>+ msr_info->data = vmcs_read64(GUEST_IA32_FRED_SSP2);
>+ break;
>+ case MSR_IA32_FRED_SSP3:
>+ msr_info->data = vmcs_read64(GUEST_IA32_FRED_SSP3);
>+ break;
>+ case MSR_IA32_FRED_CONFIG:
>+ msr_info->data = vmcs_read64(GUEST_IA32_FRED_CONFIG);
>+ break;

how about adding a helper function to convert MSR index to the VMCS field id?
Then do:

case MSR_IA32_FRED_RSP1 ... MSR_IA32_FRED_STKLVLS:
case MSR_IA32_FRED_SSP1 ... MSR_IA32_FRED_CONFIG:
msr_info->data = vmcs_read64(msr_to_vmcs(index));
break;

and ...

> #endif
> case MSR_EFER:
> return kvm_get_msr_common(vcpu, msr_info);
>@@ -2226,6 +2271,33 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> vmx_update_exception_bitmap(vcpu);
> }
> break;
>+ case MSR_IA32_FRED_RSP0:
>+ vmx_write_guest_fred_rsp0(vmx, data);
>+ break;
>+ case MSR_IA32_FRED_RSP1:
>+ vmcs_write64(GUEST_IA32_FRED_RSP1, data);
>+ break;
>+ case MSR_IA32_FRED_RSP2:
>+ vmcs_write64(GUEST_IA32_FRED_RSP2, data);
>+ break;
>+ case MSR_IA32_FRED_RSP3:
>+ vmcs_write64(GUEST_IA32_FRED_RSP3, data);
>+ break;
>+ case MSR_IA32_FRED_STKLVLS:
>+ vmcs_write64(GUEST_IA32_FRED_STKLVLS, data);
>+ break;
>+ case MSR_IA32_FRED_SSP1:
>+ vmcs_write64(GUEST_IA32_FRED_SSP1, data);
>+ break;
>+ case MSR_IA32_FRED_SSP2:
>+ vmcs_write64(GUEST_IA32_FRED_SSP2, data);
>+ break;
>+ case MSR_IA32_FRED_SSP3:
>+ vmcs_write64(GUEST_IA32_FRED_SSP3, data);
>+ break;
>+ case MSR_IA32_FRED_CONFIG:
>+ vmcs_write64(GUEST_IA32_FRED_CONFIG, data);
>+ break;

case MSR_IA32_FRED_RSP1 ... MSR_IA32_FRED_STKLVLS:
case MSR_IA32_FRED_SSP1 ... MSR_IA32_FRED_CONFIG:
vmcs_write64(msr_to_vmcs(index), data);
break;

The code will be more compact and generate less instructions. I believe CET
series can do the same change [*]. Performance here isn't critical. I just
think it looks cumbersome to repeat the same pattern for 8 (and more with
CET considered) MSRs.

[*]: https://lore.kernel.org/kvm/20240219074733.122080-21-weijiang.yang@xxxxxxxxx/

> #endif
> case MSR_IA32_SYSENTER_CS:
> if (is_guest_mode(vcpu))
>diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>index 363b1c080205..4e8d60f248e3 100644
>--- a/arch/x86/kvm/x86.c
>+++ b/arch/x86/kvm/x86.c
>@@ -1451,6 +1451,9 @@ static const u32 msrs_to_save_base[] = {
> MSR_STAR,
> #ifdef CONFIG_X86_64
> MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
>+ MSR_IA32_FRED_RSP0, MSR_IA32_FRED_RSP1, MSR_IA32_FRED_RSP2,
>+ MSR_IA32_FRED_RSP3, MSR_IA32_FRED_STKLVLS, MSR_IA32_FRED_SSP1,
>+ MSR_IA32_FRED_SSP2, MSR_IA32_FRED_SSP3, MSR_IA32_FRED_CONFIG,
> #endif
> MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
> MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
>@@ -1892,6 +1895,30 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
> return 1;
>
> data = (u32)data;
>+ break;
>+ case MSR_IA32_FRED_RSP0 ... MSR_IA32_FRED_CONFIG:
>+ if (index != MSR_IA32_FRED_STKLVLS && is_noncanonical_address(data, vcpu))
>+ return 1;
>+ if ((index >= MSR_IA32_FRED_RSP0 && index <= MSR_IA32_FRED_RSP3) &&
>+ (data & GENMASK_ULL(5, 0)))
>+ return 1;
>+ if ((index >= MSR_IA32_FRED_SSP1 && index <= MSR_IA32_FRED_SSP3) &&
>+ (data & GENMASK_ULL(2, 0)))
>+ return 1;
>+
>+ if (host_initiated) {
>+ if (!kvm_cpu_cap_has(X86_FEATURE_FRED))
>+ return 1;

Should be:
if (!kvm_cpu_cap_has(X86_FEATURE_FRED) && data)

KVM ABI allows userspace to write only 0 if guests cannot enumerate the
feature. And even better, your next version can be on top of Sean's series

https://lore.kernel.org/kvm/20240425181422.3250947-1-seanjc@xxxxxxxxxx/T/#md00be687770e1e658fc9fe0eac20b5f0bd230e4c

this way, you can get rid of the "host_initiated" check.