[PATCH 4.4 41/88] KVM: nVMX: fix msr bitmaps to prevent L2 from accessing L0 x2APIC

From: Greg Kroah-Hartman
Date: Fri Dec 14 2018 - 07:19:15 EST


4.4-stable review patch. If anyone has any objections, please let me know.

------------------

From: "Radim KrÄmÃÅ" <rkrcmar@xxxxxxxxxx>

commit d048c098218e91ed0e10dfa1f0f80e2567fe4ef7 upstream.

msr bitmap can be used to avoid a VM exit (interception) on guest MSR
accesses. In some configurations of VMX controls, the guest can even
directly access host's x2APIC MSRs. See SDM 29.5 VIRTUALIZING MSR-BASED
APIC ACCESSES.

L2 could read all L0's x2APIC MSRs and write TPR, EOI, and SELF_IPI.
To do so, L1 would first trick KVM to disable all possible interceptions
by enabling APICv features and then would turn those features off;
nested_vmx_merge_msr_bitmap() only disabled interceptions, so VMX would
not intercept previously enabled MSRs even though they were not safe
with the new configuration.

Correctly re-enabling interceptions is not enough as a second bug would
still allow L1+L2 to access host's MSRs: msr bitmap was shared for all
VMCSs, so L1 could trigger a race to get the desired combination of msr
bitmap and VMX controls.

This fix allocates a msr bitmap for every L1 VCPU, allows only safe
x2APIC MSRs from L1's msr bitmap, and disables msr bitmaps if they would
have to intercept everything anyway.

Fixes: 3af18d9c5fe9 ("KVM: nVMX: Prepare for using hardware MSR bitmap")
Reported-by: Jim Mattson <jmattson@xxxxxxxxxx>
Suggested-by: Wincy Van <fanwenyi0529@xxxxxxxxx>
Reviewed-by: Wanpeng Li <wanpeng.li@xxxxxxxxxxx>
Signed-off-by: Radim KrÄmÃÅ <rkrcmar@xxxxxxxxxx>
[bwh: Backported to 4.4:
- handle_vmon() doesn't allocate a cached vmcs12
- Adjust context]
Signed-off-by: Ben Hutchings <ben.hutchings@xxxxxxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
arch/x86/kvm/vmx.c | 96 ++++++++++++++++++++---------------------------------
1 file changed, 38 insertions(+), 58 deletions(-)

--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -431,6 +431,8 @@ struct nested_vmx {
u16 posted_intr_nv;
u64 msr_ia32_feature_control;

+ unsigned long *msr_bitmap;
+
struct hrtimer preemption_timer;
bool preemption_timer_expired;

@@ -912,7 +914,6 @@ static unsigned long *vmx_msr_bitmap_leg
static unsigned long *vmx_msr_bitmap_longmode;
static unsigned long *vmx_msr_bitmap_legacy_x2apic;
static unsigned long *vmx_msr_bitmap_longmode_x2apic;
-static unsigned long *vmx_msr_bitmap_nested;
static unsigned long *vmx_vmread_bitmap;
static unsigned long *vmx_vmwrite_bitmap;

@@ -2358,7 +2359,7 @@ static void vmx_set_msr_bitmap(struct kv
unsigned long *msr_bitmap;

if (is_guest_mode(vcpu))
- msr_bitmap = vmx_msr_bitmap_nested;
+ msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap;
else if (vcpu->arch.apic_base & X2APIC_ENABLE) {
if (is_long_mode(vcpu))
msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
@@ -6192,13 +6193,6 @@ static __init int hardware_setup(void)
if (!vmx_msr_bitmap_longmode_x2apic)
goto out4;

- if (nested) {
- vmx_msr_bitmap_nested =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_nested)
- goto out5;
- }
-
vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
if (!vmx_vmread_bitmap)
goto out6;
@@ -6216,8 +6210,6 @@ static __init int hardware_setup(void)

memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
- if (nested)
- memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE);

if (setup_vmcs_config(&vmcs_config) < 0) {
r = -EIO;
@@ -6354,9 +6346,6 @@ out8:
out7:
free_page((unsigned long)vmx_vmread_bitmap);
out6:
- if (nested)
- free_page((unsigned long)vmx_msr_bitmap_nested);
-out5:
free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
out4:
free_page((unsigned long)vmx_msr_bitmap_longmode);
@@ -6382,8 +6371,6 @@ static __exit void hardware_unsetup(void
free_page((unsigned long)vmx_io_bitmap_a);
free_page((unsigned long)vmx_vmwrite_bitmap);
free_page((unsigned long)vmx_vmread_bitmap);
- if (nested)
- free_page((unsigned long)vmx_msr_bitmap_nested);

free_kvm_area();
}
@@ -6825,10 +6812,17 @@ static int handle_vmon(struct kvm_vcpu *
return 1;
}

+ if (cpu_has_vmx_msr_bitmap()) {
+ vmx->nested.msr_bitmap =
+ (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (!vmx->nested.msr_bitmap)
+ goto out_msr_bitmap;
+ }
+
if (enable_shadow_vmcs) {
shadow_vmcs = alloc_vmcs();
if (!shadow_vmcs)
- return -ENOMEM;
+ goto out_shadow_vmcs;
/* mark vmcs as shadow */
shadow_vmcs->revision_id |= (1u << 31);
/* init shadow vmcs */
@@ -6850,6 +6844,12 @@ static int handle_vmon(struct kvm_vcpu *
skip_emulated_instruction(vcpu);
nested_vmx_succeed(vcpu);
return 1;
+
+out_shadow_vmcs:
+ free_page((unsigned long)vmx->nested.msr_bitmap);
+
+out_msr_bitmap:
+ return -ENOMEM;
}

/*
@@ -6919,6 +6919,10 @@ static void free_nested(struct vcpu_vmx
vmx->nested.vmxon = false;
free_vpid(vmx->nested.vpid02);
nested_release_vmcs12(vmx);
+ if (vmx->nested.msr_bitmap) {
+ free_page((unsigned long)vmx->nested.msr_bitmap);
+ vmx->nested.msr_bitmap = NULL;
+ }
if (enable_shadow_vmcs)
free_vmcs(vmx->nested.current_shadow_vmcs);
/* Unpin physical memory we referred to in current vmcs02 */
@@ -9248,8 +9252,10 @@ static inline bool nested_vmx_merge_msr_
{
int msr;
struct page *page;
- unsigned long *msr_bitmap;
+ unsigned long *msr_bitmap_l1;
+ unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;

+ /* This shortcut is ok because we support only x2APIC MSRs so far. */
if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
return false;

@@ -9258,58 +9264,32 @@ static inline bool nested_vmx_merge_msr_
WARN_ON(1);
return false;
}
- msr_bitmap = (unsigned long *)kmap(page);
+ msr_bitmap_l1 = (unsigned long *)kmap(page);
+
+ memset(msr_bitmap_l0, 0xff, PAGE_SIZE);

if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
if (nested_cpu_has_apic_reg_virt(vmcs12))
for (msr = 0x800; msr <= 0x8ff; msr++)
nested_vmx_disable_intercept_for_msr(
- msr_bitmap,
- vmx_msr_bitmap_nested,
+ msr_bitmap_l1, msr_bitmap_l0,
msr, MSR_TYPE_R);
- /* TPR is allowed */
- nested_vmx_disable_intercept_for_msr(msr_bitmap,
- vmx_msr_bitmap_nested,
+
+ nested_vmx_disable_intercept_for_msr(
+ msr_bitmap_l1, msr_bitmap_l0,
APIC_BASE_MSR + (APIC_TASKPRI >> 4),
MSR_TYPE_R | MSR_TYPE_W);
+
if (nested_cpu_has_vid(vmcs12)) {
- /* EOI and self-IPI are allowed */
nested_vmx_disable_intercept_for_msr(
- msr_bitmap,
- vmx_msr_bitmap_nested,
+ msr_bitmap_l1, msr_bitmap_l0,
APIC_BASE_MSR + (APIC_EOI >> 4),
MSR_TYPE_W);
nested_vmx_disable_intercept_for_msr(
- msr_bitmap,
- vmx_msr_bitmap_nested,
+ msr_bitmap_l1, msr_bitmap_l0,
APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
MSR_TYPE_W);
}
- } else {
- /*
- * Enable reading intercept of all the x2apic
- * MSRs. We should not rely on vmcs12 to do any
- * optimizations here, it may have been modified
- * by L1.
- */
- for (msr = 0x800; msr <= 0x8ff; msr++)
- __vmx_enable_intercept_for_msr(
- vmx_msr_bitmap_nested,
- msr,
- MSR_TYPE_R);
-
- __vmx_enable_intercept_for_msr(
- vmx_msr_bitmap_nested,
- APIC_BASE_MSR + (APIC_TASKPRI >> 4),
- MSR_TYPE_W);
- __vmx_enable_intercept_for_msr(
- vmx_msr_bitmap_nested,
- APIC_BASE_MSR + (APIC_EOI >> 4),
- MSR_TYPE_W);
- __vmx_enable_intercept_for_msr(
- vmx_msr_bitmap_nested,
- APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
- MSR_TYPE_W);
}
kunmap(page);
nested_release_page_clean(page);
@@ -9729,10 +9709,10 @@ static void prepare_vmcs02(struct kvm_vc
}

if (cpu_has_vmx_msr_bitmap() &&
- exec_control & CPU_BASED_USE_MSR_BITMAPS) {
- nested_vmx_merge_msr_bitmap(vcpu, vmcs12);
- /* MSR_BITMAP will be set by following vmx_set_efer. */
- } else
+ exec_control & CPU_BASED_USE_MSR_BITMAPS &&
+ nested_vmx_merge_msr_bitmap(vcpu, vmcs12))
+ ; /* MSR_BITMAP will be set by following vmx_set_efer. */
+ else
exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;

/*