Re: [PATCH 1/4] KVM: nVMX: support restore of VMX capability MSRs

From: Paolo Bonzini
Date: Wed Nov 23 2016 - 06:44:40 EST




On 23/11/2016 02:14, David Matlack wrote:
> The VMX capability MSRs advertise the set of features the KVM virtual
> CPU can support. This set of features vary across different host CPUs
> and KVM versions. This patch aims to addresses both sources of
> differences, allowing VMs to be migrated across CPUs and KVM versions
> without guest-visible changes to these MSRs. Note that cross-KVM-
> version migration is only supported from this point forward.
>
> When the VMX capability MSRs are restored, they are audited to check
> that the set of features advertised are a subset of what KVM and the
> CPU support.
>
> Since the VMX capability MSRs are read-only, they do not need to be on
> the default MSR save/restore lists. The userspace hypervisor can set
> the values of these MSRs or read them from KVM at VCPU creation time,
> and restore the same value after every save/restore.
>
> Signed-off-by: David Matlack <dmatlack@xxxxxxxxxx>
> ---
> arch/x86/include/asm/vmx.h | 31 +++++
> arch/x86/kvm/vmx.c | 317 +++++++++++++++++++++++++++++++++++++++++----
> 2 files changed, 324 insertions(+), 24 deletions(-)
>
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index a002b07..a4ca897 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -25,6 +25,7 @@
> #define VMX_H
>
>
> +#include <linux/bitops.h>
> #include <linux/types.h>
> #include <uapi/asm/vmx.h>
>
> @@ -110,6 +111,36 @@
> #define VMX_MISC_SAVE_EFER_LMA 0x00000020
> #define VMX_MISC_ACTIVITY_HLT 0x00000040
>
> +static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
> +{
> + return vmx_basic & GENMASK_ULL(30, 0);
> +}
> +
> +static inline u32 vmx_basic_vmcs_size(u64 vmx_basic)
> +{
> + return (vmx_basic & GENMASK_ULL(44, 32)) >> 32;
> +}
> +
> +static inline int vmx_misc_preemption_timer_rate(u64 vmx_misc)
> +{
> + return vmx_misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
> +}
> +
> +static inline int vmx_misc_cr3_count(u64 vmx_misc)
> +{
> + return (vmx_misc & GENMASK_ULL(24, 16)) >> 16;
> +}
> +
> +static inline int vmx_misc_max_msr(u64 vmx_misc)
> +{
> + return (vmx_misc & GENMASK_ULL(27, 25)) >> 25;
> +}
> +
> +static inline int vmx_misc_mseg_revid(u64 vmx_misc)
> +{
> + return (vmx_misc & GENMASK_ULL(63, 32)) >> 32;
> +}
> +
> /* VMCS Encodings */
> enum vmcs_field {
> VIRTUAL_PROCESSOR_ID = 0x00000000,
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 5382b82..6ec3832 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -463,6 +463,12 @@ struct nested_vmx {
> u32 nested_vmx_misc_high;
> u32 nested_vmx_ept_caps;

> +/*
> + * Called when userspace is restoring VMX MSRs.
> + *
> + * Returns 0 on success, non-0 otherwise.
> + */
> +static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
> {
> struct vcpu_vmx *vmx = to_vmx(vcpu);
>
> switch (msr_index) {
> case MSR_IA32_VMX_BASIC:
> + return vmx_restore_vmx_basic(vmx, data);
> + case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
> + case MSR_IA32_VMX_PINBASED_CTLS:
> + case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
> + case MSR_IA32_VMX_PROCBASED_CTLS:
> + case MSR_IA32_VMX_TRUE_EXIT_CTLS:
> + case MSR_IA32_VMX_EXIT_CTLS:
> + case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
> + case MSR_IA32_VMX_ENTRY_CTLS:

PINBASED_CTLS, PROCBASED_CTLS, EXIT_CTLS and ENTRY_CTLS can be derived
from their "true" counterparts, so I think it's better to remove the
"non-true" ones from struct nested_vmx (and/or add the "true" ones when
missing) and make them entirely computed. But it can be done on top.

Paolo

> + case MSR_IA32_VMX_PROCBASED_CTLS2:
> + return vmx_restore_control_msr(vmx, msr_index, data);
> + case MSR_IA32_VMX_MISC:
> + return vmx_restore_vmx_misc(vmx, data);
> + case MSR_IA32_VMX_CR0_FIXED0:
> + case MSR_IA32_VMX_CR4_FIXED0:
> + return vmx_restore_fixed0_msr(vmx, msr_index, data);
> + case MSR_IA32_VMX_CR0_FIXED1:
> + case MSR_IA32_VMX_CR4_FIXED1:
> + return vmx_restore_fixed1_msr(vmx, msr_index, data);
> + case MSR_IA32_VMX_EPT_VPID_CAP:
> + return vmx_restore_vmx_ept_vpid_cap(vmx, data);
> + case MSR_IA32_VMX_VMCS_ENUM:
> + vmx->nested.nested_vmx_vmcs_enum = data;
> + return 0;
> + default:
> /*
> - * This MSR reports some information about VMX support. We
> - * should return information about the VMX we emulate for the
> - * guest, and the VMCS structure we give it - not about the
> - * VMX support of the underlying hardware.
> + * The rest of the VMX capability MSRs do not support restore.
> */
> - *pdata = VMCS12_REVISION | VMX_BASIC_TRUE_CTLS |
> - ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
> - (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
> - if (cpu_has_vmx_basic_inout())
> - *pdata |= VMX_BASIC_INOUT;
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> +
> +/* Returns 0 on success, non-0 otherwise. */
> +static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
> +{
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> +
> + switch (msr_index) {
> + case MSR_IA32_VMX_BASIC:
> + *pdata = vmx->nested.nested_vmx_basic;
> break;
> case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
> case MSR_IA32_VMX_PINBASED_CTLS:
> @@ -2904,27 +3176,20 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
> vmx->nested.nested_vmx_misc_low,
> vmx->nested.nested_vmx_misc_high);
> break;
> - /*
> - * These MSRs specify bits which the guest must keep fixed (on or off)
> - * while L1 is in VMXON mode (in L1's root mode, or running an L2).
> - * We picked the standard core2 setting.
> - */
> -#define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
> -#define VMXON_CR4_ALWAYSON X86_CR4_VMXE
> case MSR_IA32_VMX_CR0_FIXED0:
> - *pdata = VMXON_CR0_ALWAYSON;
> + *pdata = vmx->nested.nested_vmx_cr0_fixed0;
> break;
> case MSR_IA32_VMX_CR0_FIXED1:
> - *pdata = -1ULL;
> + *pdata = vmx->nested.nested_vmx_cr0_fixed1;
> break;
> case MSR_IA32_VMX_CR4_FIXED0:
> - *pdata = VMXON_CR4_ALWAYSON;
> + *pdata = vmx->nested.nested_vmx_cr4_fixed0;
> break;
> case MSR_IA32_VMX_CR4_FIXED1:
> - *pdata = -1ULL;
> + *pdata = vmx->nested.nested_vmx_cr4_fixed1;
> break;
> case MSR_IA32_VMX_VMCS_ENUM:
> - *pdata = 0x2e; /* highest index: VMX_PREEMPTION_TIMER_VALUE */
> + *pdata = vmx->nested.nested_vmx_vmcs_enum;
> break;
> case MSR_IA32_VMX_PROCBASED_CTLS2:
> *pdata = vmx_control_msr(
> @@ -3107,7 +3372,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> vmx_leave_nested(vcpu);
> break;
> case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
> - return 1; /* they are read-only */
> + if (!msr_info->host_initiated)
> + return 1; /* they are read-only */
> + if (!nested_vmx_allowed(vcpu))
> + return 1;
> + return vmx_set_vmx_msr(vcpu, msr_index, data);
> case MSR_IA32_XSS:
> if (!vmx_xsaves_supported())
> return 1;
>