[tip:x86/pti] x86/retpoline: Avoid return buffer underflows on context switch

From: tip-bot for Andi Kleen
Date: Tue Jan 09 2018 - 11:05:26 EST


Commit-ID: 450c505047981e97471f0170e0102f613bba4739
Gitweb: https://git.kernel.org/tip/450c505047981e97471f0170e0102f613bba4739
Author: Andi Kleen <ak@xxxxxxxxxxxxxxx>
AuthorDate: Tue, 9 Jan 2018 14:43:17 +0000
Committer: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
CommitDate: Tue, 9 Jan 2018 16:17:55 +0100

x86/retpoline: Avoid return buffer underflows on context switch

CPUs have return buffers which store the return address for RET to predict
function returns. Some CPUs (Skylake, some Broadwells) can fall back to
indirect branch prediction on return buffer underflow.

retpoline is supposed to prevent uncontrolled indirect branch speculation,
which could be poisoned by ring 3, so it needs to prevent uncontrolled
return buffer underflows in the kernel as well.

This can happen when a context switch from a shallower to a deeper kernel
stack happens. The deeper kernel stack would eventually underflow the
return buffer, which again would make the CPU fall back to the indirect
branch predictor.

To guard against this fill the return buffer with controlled content during
context switch. This prevents any underflows.

Always fill the buffer with 30 entries: 32 minus 2 for at least one call
from entry_{64,32}.S to C code and another into the function doing the
fill.

That's pessimistic because there are likely more controlled kernel calls
before this happens, but it depends on compiler optimizations and other
factors so avoid speculative optimization, error on the side of safety and
always fill 30 entries.

[dwmw2: Fix comments about nop between calls, Move #ifdef CONFIG_RETPOLINE
to call sites not macro. Use Google's original RSB stuffing.]

[tglx: Massaged changelog ]

Signed-off-by: Andi Kleen <ak@xxxxxxxxxxxxxxx>
Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx>
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: gnomes@xxxxxxxxxxxxxxxxxxx
Cc: Rik van Riel <riel@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Jiri Kosina <jikos@xxxxxxxxxx>
Cc: Andy Lutomirski <luto@xxxxxxxxxxxxxx>
Cc: Dave Hansen <dave.hansen@xxxxxxxxx>
Cc: Kees Cook <keescook@xxxxxxxxxx>
Cc: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx>
Cc: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxxx>
Cc: Paul Turner <pjt@xxxxxxxxxx>
Link: https://lkml.kernel.org/r/1515508997-6154-12-git-send-email-dwmw@xxxxxxxxxxxx

---
arch/x86/entry/entry_32.S | 17 ++++++++++++++
arch/x86/entry/entry_64.S | 17 ++++++++++++++
arch/x86/include/asm/nospec-branch.h | 44 ++++++++++++++++++++++++++++++++++++
3 files changed, 78 insertions(+)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index a1f28a5..d2ef7f32 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -244,6 +244,23 @@ ENTRY(__switch_to_asm)
movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
#endif

+#ifdef CONFIG_RETPOLINE
+ /*
+ * When we switch from a shallower to a deeper call stack
+ * the call stack will underflow in the kernel in the next task.
+ * This could cause the CPU to fall back to indirect branch
+ * prediction, which may be poisoned.
+ *
+ * To guard against that always fill the return stack with
+ * known values.
+ *
+ * We do this in assembler because it needs to be before
+ * any calls on the new stack, and this can be difficult to
+ * ensure in a complex C function like __switch_to.
+ */
+ ALTERNATIVE "", "FILL_RETURN_BUFFER %ebx", X86_FEATURE_RETPOLINE
+#endif
+
/* restore callee-saved registers */
popl %esi
popl %edi
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 59874bc..58dbf7a 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -487,6 +487,23 @@ ENTRY(__switch_to_asm)
movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
#endif

+#ifdef CONFIG_RETPOLINE
+ /*
+ * When we switch from a shallower to a deeper call stack
+ * the call stack will underflow in the kernel in the next task.
+ * This could cause the CPU to fall back to indirect branch
+ * prediction, which may be poisoned.
+ *
+ * To guard against that always fill the return stack with
+ * known values.
+ *
+ * We do this in assembler because it needs to be before
+ * any calls on the new stack, and this can be difficult to
+ * ensure in a complex C function like __switch_to.
+ */
+ ALTERNATIVE "", "FILL_RETURN_BUFFER %r12", X86_FEATURE_RETPOLINE
+#endif
+
/* restore callee-saved registers */
popq %r15
popq %r14
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index a86e845..8ddf851 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -64,6 +64,50 @@
#endif
.endm

+/*
+ * Use 32-N: 32 is the max return buffer size, but there should have been
+ * at a minimum two controlled calls already: one into the kernel from
+ * entry*.S and another into the function containing this macro. So N=2,
+ * thus 30.
+ */
+#define NUM_BRANCHES_TO_FILL 30
+
+/*
+ * Fill the CPU return stack buffer to prevent indirect branch prediction
+ * on underflow.
+ *
+ * A 'nop' after each call is required so it isn't interpreted by the CPU
+ * as a simple 'push %eip', which would be handled specially and would not
+ * put anything in the RSB.
+ *
+ * Required in various cases for retpoline and IBRS-based mitigations for
+ * Spectre variant 2 vulnerability.
+ */
+.macro FILL_RETURN_BUFFER reg:req
+ mov $NUM_BRANCHES_TO_FILL/2, \reg
+ .align 16
+.Ldo_call1_\@:
+ call .Ldo_call2_\@
+.Ltrap1_\@:
+ pause
+ jmp .Ltrap1_\@
+ .align 16
+.Ldo_call2_\@:
+ call .Ldo_loop_\@
+.Ltrap2_\@:
+ pause
+ jmp .Ltrap2_\@
+ .align 16
+.Ldo_loop_\@:
+ dec \reg
+ jnz .Ldo_call1_\@
+#ifdef CONFIG_64BIT
+ addq $8*NUM_BRANCHES_TO_FILL, %rsp
+#else
+ addl $4*NUM_BRANCHES_TO_FILL, %esp
+#endif
+.endm
+
#else /* __ASSEMBLY__ */

#if defined(CONFIG_X86_64) && defined(RETPOLINE)