[PATCH v2 0/8] KVM: x86: expose CVE-2017-5715 ("Spectre variant 2") mitigations to guest

From: Paolo Bonzini
Date: Tue Jan 09 2018 - 07:05:52 EST


This series allows guests to use the MSR_IA32_SPEC_CTRL and
MSR_IA32_PRED_CMD model specific registers that were added as mitigations
for CVE-2017-5715.

These are only the KVM specific parts of the fix. It does *not* yet
include any protection for reading host memory from the guest, because
that would be done in the same way as the rest of Linux. So there is no
IBRS *usage* here, no retpolines, no stuffing of the return stack buffer.
(KVM already includes a fix to clear all registers on vmexit, which is
enough to block Google Project Zero's PoC exploit).

However, I am including the changes to use IBPB (indirect branch
predictor barrier) if available. That occurs only when there is a VCPU
switch on a physical CPU, thus it has a small impact on performance.

The patches are a bit hackish because the relevant cpufeatures have
not been included yet, and because I wanted to make the patches easier
to backport to distro kernels if desired, but I would still like to
have them in 4.16.

Please review. The interdiff from v1 is at the end of this cover letter.

Thanks,

Paolo

Paolo Bonzini (6):
KVM: x86: add SPEC_CTRL and IBPB_SUPPORT accessors
x86/msr: add definitions for indirect branch predictor MSRs
kvm: vmx: pass MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD down to the guest
KVM: SVM: fix comment
kvm: svm: pass MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD down to guest
KVM: x86: add SPEC_CTRL and IBPB_SUPPORT to MSR and CPUID lists

Tim Chen (1):
kvm: vmx: Set IBPB when running a different VCPU

Tom Lendacky (1):
x86/svm: Set IBPB when running a different VCPU

arch/x86/include/asm/msr-index.h | 9 ++++-
arch/x86/kvm/cpuid.c | 27 ++++++++++---
arch/x86/kvm/cpuid.h | 22 +++++++++++
arch/x86/kvm/svm.c | 83 +++++++++++++++++++++++++++++++++++++++-
arch/x86/kvm/vmx.c | 59 ++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 1 +
6 files changed, 193 insertions(+), 8 deletions(-)


diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ec08f1d8d39b..828a03425571 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -39,11 +39,6 @@

/* Intel MSRs. Some also available on other CPUs */

-#define MSR_IA32_SPEC_CTRL 0x00000048
-
-#define MSR_IA32_PRED_CMD 0x00000049
-#define FEATURE_SET_IBPB (1UL << 0)
-
#define MSR_PPIN_CTL 0x0000004e
#define MSR_PPIN 0x0000004f

@@ -469,8 +464,15 @@
#define MSR_SMI_COUNT 0x00000034
#define MSR_IA32_FEATURE_CONTROL 0x0000003a
#define MSR_IA32_TSC_ADJUST 0x0000003b
+
+#define MSR_IA32_SPEC_CTRL 0x00000048
+#define SPEC_CTRL_FEATURE_DISABLE_IBRS (0 << 0)
+#define SPEC_CTRL_FEATURE_ENABLE_IBRS (1 << 0)
+
+#define MSR_IA32_PRED_CMD 0x00000049
+#define PRED_CMD_IBPB (1UL << 0)
+
#define MSR_IA32_BNDCFGS 0x00000d90
-
#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc

#define MSR_IA32_XSS 0x00000da0
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index dd744d896cec..c249d5f599e4 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1738,7 +1738,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
* svm_vcpu_load; block speculative execution.
*/
if (have_ibpb_support)
- wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
+ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
}

static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -1776,7 +1776,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (sd->current_vmcb != svm->vmcb) {
sd->current_vmcb = svm->vmcb;
if (have_ibpb_support)
- wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
+ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
}

avic_vcpu_load(vcpu, cpu);
@@ -3648,6 +3648,10 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = svm->nested.vm_cr_msr;
break;
case MSR_IA32_SPEC_CTRL:
+ if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
+ (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
+ return 1;
msr_info->data = svm->spec_ctrl;
break;
case MSR_IA32_UCODE_REV:
@@ -3806,6 +3810,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
break;
case MSR_IA32_SPEC_CTRL:
+ if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
+ (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
+ return 1;
svm->spec_ctrl = data;
break;
case MSR_IA32_APICBASE:
@@ -4996,6 +5004,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)

local_irq_enable();

+ /*
+ * MSR_IA32_SPEC_CTRL is restored after the last indirect branch
+ * before vmentry.
+ */
if (have_spec_ctrl && svm->spec_ctrl != 0)
wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);

@@ -5077,6 +5089,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
if (svm->spec_ctrl != 0)
wrmsrl(MSR_IA32_SPEC_CTRL, 0);
}
+ /*
+ * Speculative execution past the above wrmsrl might encounter
+ * an indirect branch and use guest-controlled contents of the
+ * indirect branch predictor; block it.
+ */
+ asm("lfence");

#ifdef CONFIG_X86_64
wrmsrl(MSR_GS_BASE, svm->host.gs_base);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bf127c570675..ef2681fa568a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2376,7 +2376,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
vmcs_load(vmx->loaded_vmcs->vmcs);
if (have_spec_ctrl)
- wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
+ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
}

if (!already_loaded) {
@@ -3347,6 +3347,7 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
*/
static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
struct shared_msr_entry *msr;

switch (msr_info->index) {
@@ -3358,8 +3359,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vmcs_readl(GUEST_GS_BASE);
break;
case MSR_KERNEL_GS_BASE:
- vmx_load_host_state(to_vmx(vcpu));
- msr_info->data = to_vmx(vcpu)->msr_guest_kernel_gs_base;
+ vmx_load_host_state(vmx);
+ msr_info->data = vmx->msr_guest_kernel_gs_base;
break;
#endif
case MSR_EFER:
@@ -3368,7 +3369,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = guest_read_tsc(vcpu);
break;
case MSR_IA32_SPEC_CTRL:
- msr_info->data = to_vmx(vcpu)->spec_ctrl;
+ if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
+ (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
+ return 1;
+ msr_info->data = to_vmx(vcpu)->spec_ctrl;
break;
case MSR_IA32_SYSENTER_CS:
msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
@@ -3388,13 +3393,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_MCG_EXT_CTL:
if (!msr_info->host_initiated &&
- !(to_vmx(vcpu)->msr_ia32_feature_control &
+ !(vmx->msr_ia32_feature_control &
FEATURE_CONTROL_LMCE))
return 1;
msr_info->data = vcpu->arch.mcg_ext_ctl;
break;
case MSR_IA32_FEATURE_CONTROL:
- msr_info->data = to_vmx(vcpu)->msr_ia32_feature_control;
+ msr_info->data = vmx->msr_ia32_feature_control;
break;
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
if (!nested_vmx_allowed(vcpu))
@@ -3443,7 +3448,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
/* Otherwise falls through */
default:
- msr = find_msr_entry(to_vmx(vcpu), msr_info->index);
+ msr = find_msr_entry(vmx, msr_info->index);
if (msr) {
msr_info->data = msr->data;
break;
@@ -3510,7 +3515,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
kvm_write_tsc(vcpu, msr_info);
break;
case MSR_IA32_SPEC_CTRL:
- to_vmx(vcpu)->spec_ctrl = msr_info->data;
+ if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
+ (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
+ return 1;
+ to_vmx(vcpu)->spec_ctrl = data;
break;
case MSR_IA32_CR_PAT:
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
@@ -4046,7 +4046,7 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
* vmx_vcpu_load; block speculative execution.
*/
if (have_spec_ctrl)
- wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
+ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
}

static void vmx_nested_free_vmcs02(struct vcpu_vmx *vmx)
@@ -9629,13 +9638,17 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)

pt_guest_enter(vmx);

- if (have_spec_ctrl && vmx->spec_ctrl != 0)
- wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
-
atomic_switch_perf_msrs(vmx);

vmx_arm_hv_timer(vcpu);

+ /*
+ * MSR_IA32_SPEC_CTRL is restored after the last indirect branch
+ * before vmentry.
+ */
+ if (have_spec_ctrl && vmx->spec_ctrl != 0)
+ wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+
vmx->__launched = vmx->loaded_vmcs->launched;
asm(
/* Store host registers */
@@ -9744,9 +9757,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)

if (have_spec_ctrl) {
rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
- if (vmx->spec_ctrl)
+ if (vmx->spec_ctrl != 0)
wrmsrl(MSR_IA32_SPEC_CTRL, 0);
}
+ /*
+ * Speculative execution past the above wrmsrl might encounter
+ * an indirect branch and use guest-controlled contents of the
+ * indirect branch predictor; block it.
+ */
+ asm("lfence");

/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
if (vmx->host_debugctlmsr)
--
1.8.3.1