[PATCH 4.18 37/79] x86/KVM/VMX: Add L1D flush algorithm

From: Greg Kroah-Hartman
Date: Tue Aug 14 2018 - 13:27:48 EST


4.18-stable review patch. If anyone has any objections, please let me know.

------------------

From: Paolo Bonzini <pbonzini@xxxxxxxxxx>

To mitigate the L1 Terminal Fault vulnerability it's required to flush L1D
on VMENTER to prevent rogue guests from snooping host memory.

CPUs will have a new control MSR via a microcode update to flush L1D with a
single MSR write, but in the absence of microcode a fallback to a software
based flush algorithm is required.

Add a software flush loop which is based on code from Intel.

[ tglx: Split out from combo patch ]
[ bpetkov: Polish the asm code ]

Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
arch/x86/kvm/vmx.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 66 insertions(+), 5 deletions(-)

--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -9599,6 +9599,46 @@ static int vmx_handle_exit(struct kvm_vc
}
}

+/*
+ * Software based L1D cache flush which is used when microcode providing
+ * the cache control MSR is not loaded.
+ *
+ * The L1D cache is 32 KiB on Nehalem and later microarchitectures, but to
+ * flush it is required to read in 64 KiB because the replacement algorithm
+ * is not exactly LRU. This could be sized at runtime via topology
+ * information but as all relevant affected CPUs have 32KiB L1D cache size
+ * there is no point in doing so.
+ */
+#define L1D_CACHE_ORDER 4
+static void *vmx_l1d_flush_pages;
+
+static void __maybe_unused vmx_l1d_flush(void)
+{
+ int size = PAGE_SIZE << L1D_CACHE_ORDER;
+
+ asm volatile(
+ /* First ensure the pages are in the TLB */
+ "xorl %%eax, %%eax\n"
+ ".Lpopulate_tlb:\n\t"
+ "movzbl (%[empty_zp], %%" _ASM_AX "), %%ecx\n\t"
+ "addl $4096, %%eax\n\t"
+ "cmpl %%eax, %[size]\n\t"
+ "jne .Lpopulate_tlb\n\t"
+ "xorl %%eax, %%eax\n\t"
+ "cpuid\n\t"
+ /* Now fill the cache */
+ "xorl %%eax, %%eax\n"
+ ".Lfill_cache:\n"
+ "movzbl (%[empty_zp], %%" _ASM_AX "), %%ecx\n\t"
+ "addl $64, %%eax\n\t"
+ "cmpl %%eax, %[size]\n\t"
+ "jne .Lfill_cache\n\t"
+ "lfence\n"
+ :: [empty_zp] "r" (vmx_l1d_flush_pages),
+ [size] "r" (size)
+ : "eax", "ebx", "ecx", "edx");
+}
+
static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
@@ -13198,13 +13238,29 @@ static struct kvm_x86_ops vmx_x86_ops __
.enable_smi_window = enable_smi_window,
};

-static void __init vmx_setup_l1d_flush(void)
+static int __init vmx_setup_l1d_flush(void)
{
+ struct page *page;
+
if (vmentry_l1d_flush == VMENTER_L1D_FLUSH_NEVER ||
!boot_cpu_has_bug(X86_BUG_L1TF))
- return;
+ return 0;
+
+ page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER);
+ if (!page)
+ return -ENOMEM;

+ vmx_l1d_flush_pages = page_address(page);
static_branch_enable(&vmx_l1d_should_flush);
+ return 0;
+}
+
+static void vmx_free_l1d_flush_pages(void)
+{
+ if (vmx_l1d_flush_pages) {
+ free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER);
+ vmx_l1d_flush_pages = NULL;
+ }
}

static int __init vmx_init(void)
@@ -13240,12 +13296,16 @@ static int __init vmx_init(void)
}
#endif

- vmx_setup_l1d_flush();
+ r = vmx_setup_l1d_flush();
+ if (r)
+ return r;

r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
- __alignof__(struct vcpu_vmx), THIS_MODULE);
- if (r)
+ __alignof__(struct vcpu_vmx), THIS_MODULE);
+ if (r) {
+ vmx_free_l1d_flush_pages();
return r;
+ }

#ifdef CONFIG_KEXEC_CORE
rcu_assign_pointer(crash_vmclear_loaded_vmcss,
@@ -13287,6 +13347,7 @@ static void __exit vmx_exit(void)
static_branch_disable(&enable_evmcs);
}
#endif
+ vmx_free_l1d_flush_pages();
}

module_init(vmx_init)