[PATCH 3/3] x86: get rid of FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK

From: Denys Vlasenko
Date: Wed Mar 18 2015 - 15:48:29 EST


FIXUP_TOP_OF_STACK is only necessary because we don't save %r11
to pt_regs->r11 on SYSCALL64 fast path, but we want ptrace to see
it populated.

Bite the bullet, add a single additional PUSH insn, and remove
FIXUP_TOP_OF_STACK.

RESTORE_TOP_OF_STACK is already a nop. Remove it too.

On SandyBridge CPU, it does not get slower:
measured 54.22 ns per getpid syscall before and after last two changes
on defconfig kernel.

Signed-off-by: Denys Vlasenko <dvlasenk@xxxxxxxxxx>
CC: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
CC: Steven Rostedt <rostedt@xxxxxxxxxxx>
CC: Ingo Molnar <mingo@xxxxxxxxxx>
CC: Borislav Petkov <bp@xxxxxxxxx>
CC: "H. Peter Anvin" <hpa@xxxxxxxxx>
CC: Andy Lutomirski <luto@xxxxxxxxxxxxxx>
CC: Oleg Nesterov <oleg@xxxxxxxxxx>
CC: Frederic Weisbecker <fweisbec@xxxxxxxxx>
CC: Alexei Starovoitov <ast@xxxxxxxxxxxx>
CC: Will Drewry <wad@xxxxxxxxxxxx>
CC: Kees Cook <keescook@xxxxxxxxxxxx>
CC: x86@xxxxxxxxxx
CC: linux-kernel@xxxxxxxxxxxxxxx
---
arch/x86/kernel/entry_64.S | 35 ++---------------------------------
1 file changed, 2 insertions(+), 33 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 4647c1d..a0a3a6e 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -22,8 +22,6 @@
* - CFI macros are used to generate dwarf2 unwind information for better
* backtraces. They don't change any code.
* - ENTRY/END Define functions in the symbol table.
- * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
- * frame that is otherwise undefined after a SYSCALL
* - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
* - idtentry - Define exception entry points.
*/
@@ -119,23 +117,6 @@ ENDPROC(native_usergs_sysret64)
#endif

/*
- * C code is not supposed to know that the iret frame is not populated.
- * Every time a C function with an pt_regs argument is called from
- * the SYSCALL based fast path FIXUP_TOP_OF_STACK is needed.
- * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
- * manipulation.
- */
- .macro FIXUP_TOP_OF_STACK tmp offset=0
- /* copy flags to r11 as sysret would do */
- movq EFLAGS+\offset(%rsp),\tmp
- movq \tmp,R11+\offset(%rsp)
- .endm
-
- .macro RESTORE_TOP_OF_STACK tmp offset=0
- /* nothing to do */
- .endm
-
-/*
* empty frame
*/
.macro EMPTY_FRAME start=1 offset=0
@@ -259,7 +240,8 @@ GLOBAL(system_call_after_swapgs)
pushq_cfi_reg r8 /* pt_regs->r8 */
pushq_cfi_reg r9 /* pt_regs->r9 */
pushq_cfi_reg r10 /* pt_regs->r10 */
- sub $(7*8),%rsp /* pt_regs->r11,bp,bx,r12-15 not saved */
+ pushq_cfi_reg r11 /* pt_regs->r11 */
+ sub $(6*8),%rsp /* pt_regs->bp,bx,r12-15 not saved */

testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,SIZEOF_PTREGS)
jnz tracesys
@@ -306,7 +288,6 @@ ret_from_sys_call:
CFI_RESTORE_STATE

int_ret_from_sys_call_fixup:
- FIXUP_TOP_OF_STACK %r11
jmp int_ret_from_sys_call

/* Do syscall entry tracing */
@@ -322,7 +303,6 @@ tracesys:

tracesys_phase2:
SAVE_EXTRA_REGS
- FIXUP_TOP_OF_STACK %rdi
movq %rsp, %rdi
movq $AUDIT_ARCH_X86_64, %rsi
movq %rax,%rdx
@@ -415,9 +395,7 @@ ENTRY(stub_\func)
CFI_STARTPROC
DEFAULT_FRAME 0, 8 /* offset 8: return address */
SAVE_EXTRA_REGS 8
- FIXUP_TOP_OF_STACK %r11, 8
call sys_\func
- RESTORE_TOP_OF_STACK %r11, 8
ret
CFI_ENDPROC
END(stub_\func)
@@ -432,7 +410,6 @@ ENTRY(stub_execve)
addq $8, %rsp
DEFAULT_FRAME 0
SAVE_EXTRA_REGS
- FIXUP_TOP_OF_STACK %r11
call sys_execve
movq %rax,RAX(%rsp)
RESTORE_EXTRA_REGS
@@ -445,9 +422,7 @@ ENTRY(stub_execveat)
addq $8, %rsp
DEFAULT_FRAME 0
SAVE_EXTRA_REGS
- FIXUP_TOP_OF_STACK %r11
call sys_execveat
- RESTORE_TOP_OF_STACK %r11
movq %rax,RAX(%rsp)
RESTORE_EXTRA_REGS
jmp int_ret_from_sys_call
@@ -463,7 +438,6 @@ ENTRY(stub_rt_sigreturn)
addq $8, %rsp
DEFAULT_FRAME 0
SAVE_EXTRA_REGS
- FIXUP_TOP_OF_STACK %r11
call sys_rt_sigreturn
movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
RESTORE_EXTRA_REGS
@@ -477,7 +451,6 @@ ENTRY(stub_x32_rt_sigreturn)
addq $8, %rsp
DEFAULT_FRAME 0
SAVE_EXTRA_REGS
- FIXUP_TOP_OF_STACK %r11
call sys32_x32_rt_sigreturn
movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
RESTORE_EXTRA_REGS
@@ -490,9 +463,7 @@ ENTRY(stub_x32_execve)
addq $8, %rsp
DEFAULT_FRAME 0
SAVE_EXTRA_REGS
- FIXUP_TOP_OF_STACK %r11
call compat_sys_execve
- RESTORE_TOP_OF_STACK %r11
movq %rax,RAX(%rsp)
RESTORE_EXTRA_REGS
jmp int_ret_from_sys_call
@@ -504,9 +475,7 @@ ENTRY(stub_x32_execveat)
addq $8, %rsp
DEFAULT_FRAME 0
SAVE_EXTRA_REGS
- FIXUP_TOP_OF_STACK %r11
call compat_sys_execveat
- RESTORE_TOP_OF_STACK %r11
movq %rax,RAX(%rsp)
RESTORE_EXTRA_REGS
jmp int_ret_from_sys_call
--
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/