[PATCH v4 01/13] x86/retpoline: Add initial retpoline support

From: David Woodhouse
Date: Thu Jan 04 2018 - 21:01:45 EST


Enable the use of -mindirect-branch=thunk-extern in newer GCC, and provide
the corresponding thunks. Provide assembler macros for invoking the thunks
in the same way that GCC does, from native and inline assembler.

This adds an X86_BUG_NO_RETPOLINE "feature" for runtime patching out
of the thunks. This is a placeholder for now; the patches which support
the new Intel/AMD microcode features will flesh out the precise conditions
under which we disable the retpoline and do other things instead.

[Andi Kleen: Rename the macros, add CONFIG_RETPOLINE option, export thunks]

Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx>
---
arch/x86/Kconfig | 13 ++++++++
arch/x86/Makefile | 10 +++++++
arch/x86/include/asm/cpufeatures.h | 1 +
arch/x86/include/asm/nospec-branch.h | 58 ++++++++++++++++++++++++++++++++++++
arch/x86/lib/Makefile | 1 +
arch/x86/lib/retpoline.S | 53 ++++++++++++++++++++++++++++++++
6 files changed, 136 insertions(+)
create mode 100644 arch/x86/include/asm/nospec-branch.h
create mode 100644 arch/x86/lib/retpoline.S

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d4fc98c..1009d1a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -429,6 +429,19 @@ config GOLDFISH
def_bool y
depends on X86_GOLDFISH

+config RETPOLINE
+ bool "Avoid speculative indirect branches in kernel"
+ default y
+ help
+ Compile kernel with the retpoline compiler options to guard against
+ kernel to user data leaks by avoiding speculative indirect
+ branches. Requires a compiler with -mindirect-branch=thunk-extern
+ support for full protection. The kernel may run slower.
+
+ Without compiler support, at least indirect branches in assembler
+ code are eliminated. Since this includes the syscall entry path,
+ it is not entirely pointless.
+
config INTEL_RDT
bool "Intel Resource Director Technology support"
default n
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 3e73bc2..8fc45ec 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -230,6 +230,16 @@ KBUILD_CFLAGS += -Wno-sign-compare
#
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables

+# Avoid indirect branches in kernel to deal with Spectre
+ifdef CONFIG_RETPOLINE
+ RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
+ ifneq ($(RETPOLINE_CFLAGS),)
+ KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
+ else
+ $(warning Retpoline not supported in compiler. System may be insecure.)
+ endif
+endif
+
archscripts: scripts_basic
$(Q)$(MAKE) $(build)=arch/x86/tools relocs

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 07cdd17..900fa70 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -342,5 +342,6 @@
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
#define X86_BUG_CPU_INSECURE X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */
+#define X86_BUG_NO_RETPOLINE X86_BUG(15) /* Placeholder: disable retpoline branch thunks */

#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
new file mode 100644
index 0000000..573e9a6
--- /dev/null
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __NOSPEC_BRANCH_H__
+#define __NOSPEC_BRANCH_H__
+
+#include <asm/alternative.h>
+#include <asm/alternative-asm.h>
+#include <asm/cpufeatures.h>
+
+#ifdef __ASSEMBLY__
+/*
+ * The asm code uses CONFIG_RETPOLINE; this part will happen even if
+ * the toolchain isn't retpoline-capable.
+ */
+.macro NOSPEC_JMP reg:req
+#ifdef CONFIG_RETPOLINE
+ ALTERNATIVE __stringify(jmp __x86.indirect_thunk.\reg), __stringify(jmp *%\reg), X86_BUG_NO_RETPOLINE
+#else
+ jmp *%\reg
+#endif
+.endm
+
+.macro NOSPEC_CALL reg:req
+#ifdef CONFIG_RETPOLINE
+ ALTERNATIVE __stringify(call __x86.indirect_thunk.\reg), __stringify(call *%\reg), X86_BUG_NO_RETPOLINE
+#else
+ call *%\reg
+#endif
+.endm
+
+#else /* __ASSEMBLY__ */
+
+/*
+ * Since the inline asm uses the %V modifier which is only in newer
+ * toolchains, this is dependent on RETPOLINE not CONFIG_RETPOLINE.
+ */
+#ifdef RETPOLINE
+# ifdef CONFIG_64BIT
+# define NOSPEC_CALL ALTERNATIVE( \
+ "call __x86.indirect_thunk.%V[thunk_target]\n", \
+ "call *%[thunk_target]\n", X86_BUG_NO_RETPOLINE)
+# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
+# else /* i386 is going to run out of registers if we do that */
+# define NOSPEC_CALL ALTERNATIVE( \
+ " jmp 1221f; " \
+ "1222: push %[thunk_target];" \
+ " jmp __x86.indirect_thunk;" \
+ "1221: call 1222b;\n", \
+ "call *%[thunk_target]\n", X86_BUG_NO_RETPOLINE)
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+# endif /* !CONFIG_64BIT */
+#else
+# define NOSPEC_CALL "call *%[thunk_target]\n"
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#endif
+
+#endif /* __ASSEMBLY__ */
+#endif /* __NOSPEC_BRANCH_H__ */
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 7b181b6..f23934b 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -26,6 +26,7 @@ lib-y += memcpy_$(BITS).o
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
+lib-$(CONFIG_RETPOLINE) += retpoline.o

obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o

diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
new file mode 100644
index 0000000..958d56f
--- /dev/null
+++ b/arch/x86/lib/retpoline.S
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/stringify.h>
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/cpufeatures.h>
+#include <asm/alternative-asm.h>
+#include <asm/export.h>
+
+.macro THUNK sp reg
+ .section .text.__x86.indirect_thunk.\reg
+
+ENTRY(__x86.indirect_thunk.\reg)
+ CFI_STARTPROC
+ ALTERNATIVE "call 2f", __stringify(jmp *%\reg), X86_BUG_NO_RETPOLINE
+1:
+ lfence
+ jmp 1b
+2:
+ mov %\reg, (%\sp)
+ ret
+ CFI_ENDPROC
+ENDPROC(__x86.indirect_thunk.\reg)
+EXPORT_SYMBOL(__x86.indirect_thunk.\reg)
+.endm
+
+#ifdef CONFIG_64BIT
+.irp reg rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15
+ THUNK rsp \reg
+.endr
+#else
+.irp reg eax ebx ecx edx esi edi ebp
+ THUNK esp \reg
+.endr
+
+/*
+ * Also provide the original ret-equivalent retpoline for i386 because it's
+ * so register-starved, and we don't care about CET compatibility here.
+ */
+ENTRY(__x86.indirect_thunk)
+ CFI_STARTPROC
+ ALTERNATIVE "call 2f", "ret", X86_BUG_NO_RETPOLINE
+1:
+ lfence
+ jmp 1b
+2:
+ lea 4(%esp), %esp
+ ret
+ CFI_ENDPROC
+ENDPROC(__x86.indirect_thunk)
+EXPORT_SYMBOL(__x86.indirect_thunk)
+
+#endif
--
2.7.4