Re: [PATCH 3/4] x86/retpoline: Simplify vmexit_fill_RSB()
From: Borislav Petkov
Date: Fri Jan 26 2018 - 08:24:49 EST
On Fri, Jan 26, 2018 at 12:33:31PM +0000, David Woodhouse wrote:
> On Fri, 2018-01-26 at 13:11 +0100, Borislav Petkov wrote:
> >
> > +ENTRY(__fill_rsb_clobber_ax)
> > +ÂÂÂÂÂÂÂ___FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, %_ASM_SP
> > +END(__fill_rsb_clobber_ax)
> > +EXPORT_SYMBOL_GPL(__fill_rsb_clobber_ax)
>
> You still have clear vs. fill confusion there.
I just took what was there originally:
- __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
RSB_CLEAR_LOOPS
>
> How about making it take the loop count in %eax? That would allow us to
> drop the ___FILL_RETURN_BUFFER macro entirely.
>
> Or does that make us depend on your other fixes to accept jumps in
> places other than the first instruction of altinstr?Â
>
> Even if you give us separate __clear_rsb_clobber_ax vs.
> __fill_rsb_clobber_ax functions, we could still kill the macro in
> nospec-branch.h and use a .macro in retpoline.S for the actual
> implementation, couldn't we?
All good ideas. So how about the below diff ontop?
It builds and boots in a vm here. I need to go to the store but will
play with it more when I get back.
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 60c4c342316c..f7823a5a8714 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -252,7 +252,7 @@ ENTRY(__switch_to_asm)
* exist, overwrite the RSB with entries which capture
* speculative execution to prevent attack.
*/
- FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+ FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
#endif
/* restore callee-saved registers */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index ff6f8022612c..7a190ff524e2 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -499,7 +499,7 @@ ENTRY(__switch_to_asm)
* exist, overwrite the RSB with entries which capture
* speculative execution to prevent attack.
*/
- FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+ FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
#endif
/* restore callee-saved registers */
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 1908214b9125..b889705f995a 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -38,4 +38,7 @@ INDIRECT_THUNK(dx)
INDIRECT_THUNK(si)
INDIRECT_THUNK(di)
INDIRECT_THUNK(bp)
+asmlinkage void __fill_rsb_clobber_ax(void);
+asmlinkage void __clr_rsb_clobber_ax(void);
+
#endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 61d4d7033758..3049433687c8 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -27,34 +27,8 @@
#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
#define RSB_FILL_LOOPS 16 /* To avoid underflow */
-/*
- * Google experimented with loop-unrolling and this turned out to be
- * the optimal version â two calls, each with their own speculation
- * trap should their return address end up getting used, in a loop.
- */
-#define __FILL_RETURN_BUFFER(reg, nr, sp) \
- mov $(nr/2), reg; \
-771: \
- call 772f; \
-773: /* speculation trap */ \
- pause; \
- lfence; \
- jmp 773b; \
-772: \
- call 774f; \
-775: /* speculation trap */ \
- pause; \
- lfence; \
- jmp 775b; \
-774: \
- dec reg; \
- jnz 771b; \
- add $(BITS_PER_LONG/8) * nr, sp;
-
#ifdef __ASSEMBLY__
-#include <asm/bitsperlong.h>
-
/*
* This should be used immediately before a retpoline alternative. It tells
* objtool where the retpolines are so that it can make sense of the control
@@ -123,40 +97,9 @@
#endif
.endm
-/* Same as above but with alignment additionally */
-.macro ___FILL_RETURN_BUFFER reg:req nr:req sp:req
- mov (\nr / 2), \reg
- .align 16
-771:
- call 772f
-773: /* speculation trap */
- pause
- lfence
- jmp 773b
- .align 16
-772:
- call 774f
-775: /* speculation trap */
- pause
- lfence
- jmp 775b
- .align 16
-774:
- dec \reg
- jnz 771b
- add (BITS_PER_LONG/8) * \nr, \sp
-.endm
-
- /*
- * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
- * monstrosity above, manually.
- */
-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
+.macro FILL_RETURN_BUFFER nr:req ftr:req
#ifdef CONFIG_RETPOLINE
- ANNOTATE_NOSPEC_ALTERNATIVE
- ALTERNATIVE "jmp .Lskip_rsb_\@", \
- __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
- \ftr
+ ALTERNATIVE "jmp .Lskip_rsb_\@", "call __clr_rsb_clobber_ax", \ftr
.Lskip_rsb_\@:
#endif
.endm
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 491f6e0be66e..d3a67fba200a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -971,9 +971,4 @@ bool xen_set_default_idle(void);
void stop_this_cpu(void *dummy);
void df_debug(struct pt_regs *regs, long error_code);
-
-#ifdef CONFIG_RETPOLINE
-asmlinkage void __fill_rsb_clobber_ax(void);
-#endif
-
#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 297b0fd2ad10..522d92bd3176 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -7,6 +7,7 @@
#include <asm/alternative-asm.h>
#include <asm/export.h>
#include <asm/nospec-branch.h>
+#include <asm/bitsperlong.h>
.macro THUNK reg
.section .text.__x86.indirect_thunk
@@ -18,6 +19,32 @@ ENTRY(__x86_indirect_thunk_\reg)
ENDPROC(__x86_indirect_thunk_\reg)
.endm
+.macro BOINK_RSB nr:req sp:req
+ push %_ASM_AX
+ mov $(\nr / 2), %_ASM_AX
+ .align 16
+771:
+ call 772f
+773: /* speculation trap */
+ pause
+ lfence
+ jmp 773b
+ .align 16
+772:
+ call 774f
+775: /* speculation trap */
+ pause
+ lfence
+ jmp 775b
+ .align 16
+774:
+ dec %_ASM_AX
+ jnz 771b
+ add $((BITS_PER_LONG/8) * \nr), \sp
+ pop %_ASM_AX
+.endm
+
+
/*
* Despite being an assembler file we can't just use .irp here
* because __KSYM_DEPS__ only uses the C preprocessor and would
@@ -48,6 +75,13 @@ GENERATE_THUNK(r15)
#endif
ENTRY(__fill_rsb_clobber_ax)
- ___FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, %_ASM_SP
+ BOINK_RSB RSB_FILL_LOOPS, %_ASM_SP
+ ret
END(__fill_rsb_clobber_ax)
EXPORT_SYMBOL_GPL(__fill_rsb_clobber_ax)
+
+ENTRY(__clr_rsb_clobber_ax)
+ BOINK_RSB RSB_CLEAR_LOOPS, %_ASM_SP
+ ret
+END(__clr_rsb_clobber_ax)
+EXPORT_SYMBOL_GPL(__clr_rsb_clobber_ax)
--
Regards/Gruss,
Boris.
Good mailing practices for 400: avoid top-posting and trim the reply.