[PATCH 1/2] KVM: x86: disable MPX if host did not enable MPX XSAVE features

From: Paolo Bonzini
Date: Tue Mar 08 2016 - 06:45:21 EST


When eager FPU is disabled, KVM will still see the MPX bit in CPUID and
presumably the MPX vmentry and vmexit controls. However, it will not
be able to expose the MPX XSAVE features to the guest, because the guest's
accessible XSAVE features are always a subset of host_xcr0.

In this case, we should disable the MPX CPUID bit, the BNDCFGS MSR,
and the MPX vmentry and vmexit controls for nested virtualization.
It is then unnecessary to enable guest eager FPU if the guest has the
MPX CPUID bit set.

Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
---
arch/x86/kvm/cpuid.c | 13 ++++++++++---
arch/x86/kvm/cpuid.h | 9 +--------
arch/x86/kvm/vmx.c | 13 ++++++-------
3 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 6525e926f566..fa241d4fda98 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -46,11 +46,18 @@ static u32 xstate_required_size(u64 xstate_bv, bool compacted)
return ret;
}

+bool kvm_mpx_supported(void)
+{
+ return ((host_xcr0 & (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR))
+ && kvm_x86_ops->mpx_supported());
+}
+EXPORT_SYMBOL_GPL(kvm_mpx_supported);
+
u64 kvm_supported_xcr0(void)
{
u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0;

- if (!kvm_x86_ops->mpx_supported())
+ if (!kvm_mpx_supported())
xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);

return xcr0;
@@ -97,7 +104,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);

- vcpu->arch.eager_fpu = use_eager_fpu() || guest_cpuid_has_mpx(vcpu);
+ vcpu->arch.eager_fpu = use_eager_fpu();
if (vcpu->arch.eager_fpu)
kvm_x86_ops->fpu_activate(vcpu);

@@ -295,7 +302,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
#endif
unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
- unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0;
+ unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0;
unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;

/* cpuid 1.edx */
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index c8eda1498121..66a6581724ad 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -5,6 +5,7 @@
#include <asm/cpu.h>

int kvm_update_cpuid(struct kvm_vcpu *vcpu);
+bool kvm_mpx_supported(void);
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
u32 function, u32 index);
int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
@@ -135,14 +136,6 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
return best && (best->ebx & bit(X86_FEATURE_RTM));
}

-static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *best;
-
- best = kvm_find_cpuid_entry(vcpu, 7, 0);
- return best && (best->ebx & bit(X86_FEATURE_MPX));
-}
-
static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 46154dac71e6..e512aa7ed874 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -861,7 +861,6 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
static u64 construct_eptp(unsigned long root_hpa);
static void kvm_cpu_vmxon(u64 addr);
static void kvm_cpu_vmxoff(void);
-static bool vmx_mpx_supported(void);
static bool vmx_xsaves_supported(void);
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
static void vmx_set_segment(struct kvm_vcpu *vcpu,
@@ -2595,7 +2594,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;

- if (vmx_mpx_supported())
+ if (kvm_mpx_supported())
vmx->nested.nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;

/* We support free control of debug control saving. */
@@ -2616,7 +2615,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
VM_ENTRY_LOAD_IA32_PAT;
vmx->nested.nested_vmx_entry_ctls_high |=
(VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
- if (vmx_mpx_supported())
+ if (kvm_mpx_supported())
vmx->nested.nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;

/* We support free control of debug control loading. */
@@ -2860,7 +2859,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
break;
case MSR_IA32_BNDCFGS:
- if (!vmx_mpx_supported())
+ if (!kvm_mpx_supported())
return 1;
msr_info->data = vmcs_read64(GUEST_BNDCFGS);
break;
@@ -2937,7 +2936,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vmcs_writel(GUEST_SYSENTER_ESP, data);
break;
case MSR_IA32_BNDCFGS:
- if (!vmx_mpx_supported())
+ if (!kvm_mpx_supported())
return 1;
vmcs_write64(GUEST_BNDCFGS, data);
break;
@@ -3410,7 +3409,7 @@ static void init_vmcs_shadow_fields(void)
for (i = j = 0; i < max_shadow_read_write_fields; i++) {
switch (shadow_read_write_fields[i]) {
case GUEST_BNDCFGS:
- if (!vmx_mpx_supported())
+ if (!kvm_mpx_supported())
continue;
break;
default:
@@ -10265,7 +10264,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
- if (vmx_mpx_supported())
+ if (kvm_mpx_supported())
vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
if (nested_cpu_has_xsaves(vmcs12))
vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP);
--
1.8.3.1