[PATCH v2 6/8] x86/kvm: Add IBPB support

From: David Woodhouse
Date: Sun Jan 21 2018 - 04:50:05 EST


From: Ashok Raj <ashok.raj@xxxxxxxxx>

Add MSR passthrough for MSR_IA32_PRED_CMD and place branch predictor
barriers on switching between VMs to avoid inter VM specte-v2 attacks.

[peterz: rebase and changelog rewrite]
[karahmed: - vmx: expose PRED_CMD whenever it is available
- svm: only pass through IBPB if it is available]
[dwmw2: - vmx: allow X86_FEATURE_AMD_PRED_CMD too]
Cc: Asit Mallick <asit.k.mallick@xxxxxxxxx>
Cc: Dave Hansen <dave.hansen@xxxxxxxxx>
Cc: Arjan Van De Ven <arjan.van.de.ven@xxxxxxxxx>
Cc: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx>
Cc: Andi Kleen <ak@xxxxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Dan Williams <dan.j.williams@xxxxxxxxx>
Cc: Jun Nakajima <jun.nakajima@xxxxxxxxx>
Cc: Andy Lutomirski <luto@xxxxxxxxxx>
Cc: Greg KH <gregkh@xxxxxxxxxxxxxxxxxxx>
Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
Signed-off-by: Ashok Raj <ashok.raj@xxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Link: http://lkml.kernel.org/r/1515720739-43819-6-git-send-email-ashok.raj@xxxxxxxxx

Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx>
Signed-off-by: KarimAllah Ahmed <karahmed@xxxxxxxxx>
---
arch/x86/kvm/svm.c | 14 ++++++++++++++
arch/x86/kvm/vmx.c | 11 +++++++++++
2 files changed, 25 insertions(+)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2744b973..cfdb9ab 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -529,6 +529,7 @@ struct svm_cpu_data {
struct kvm_ldttss_desc *tss_desc;

struct page *save_area;
+ struct vmcb *current_vmcb;
};

static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
@@ -918,6 +919,9 @@ static void svm_vcpu_init_msrpm(u32 *msrpm)

set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
}
+
+ if (boot_cpu_has(X86_FEATURE_AMD_PRED_CMD))
+ set_msr_interception(msrpm, MSR_IA32_PRED_CMD, 1, 1);
}

static void add_msr_offset(u32 offset)
@@ -1706,11 +1710,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, svm);
+ /*
+ * The vmcb page can be recycled, causing a false negative in
+ * svm_vcpu_load(). So do a full IBPB now.
+ */
+ indirect_branch_prediction_barrier();
}

static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
int i;

if (unlikely(cpu != vcpu->cpu)) {
@@ -1739,6 +1749,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (static_cpu_has(X86_FEATURE_RDTSCP))
wrmsrl(MSR_TSC_AUX, svm->tsc_aux);

+ if (sd->current_vmcb != svm->vmcb) {
+ sd->current_vmcb = svm->vmcb;
+ indirect_branch_prediction_barrier();
+ }
avic_vcpu_load(vcpu, cpu);
}

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d1e25db..1e45bb3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2279,6 +2279,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
vmcs_load(vmx->loaded_vmcs->vmcs);
+ indirect_branch_prediction_barrier();
}

if (!already_loaded) {
@@ -6791,6 +6792,16 @@ static __init int hardware_setup(void)
kvm_tsc_scaling_ratio_frac_bits = 48;
}

+ /*
+ * The AMD_PRED_CMD bit might be exposed by hypervisors on Intel
+ * chips which only want to expose PRED_CMD to guests and not
+ * SPEC_CTRL. Because PRED_CMD is one-shot write-only, while
+ * PRED_CMD requires storage, live migration support, etc.
+ */
+ if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ||
+ boot_cpu_has(X86_FEATURE_AMD_PRED_CMD))
+ vmx_disable_intercept_for_msr(MSR_IA32_PRED_CMD, false);
+
vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
--
2.7.4