[PATCH 3/3] x86/asm/entry/32: Remove most of SYSCALL32 code, part 2

From: Denys Vlasenko
Date: Fri Jul 24 2015 - 09:48:28 EST


SYSCALL32 code is nearly identical to SYSCALL32, except for initial
section. Merge them.

This change is split into two parts, to make review eaiser.
This is part 2, which tidies up loose ends:

"sysenter_fix_flags" detour does not need to be convoluted anymore,
straigten it up.

auditsys_entry_common and auditsys_exit macros have only one caller now.
Drop masros, move their bodies to invocation locations.

Reinstate "why we use SYSRETL instead of SYSEXIT" comment.

Run-tested under QEMU: calls through SYSENTER VDSO still work:

/ # ./test_syscall_vdso32
[RUN] Executing 6-argument 32-bit syscall via VDSO
[OK] Arguments are preserved across syscall
[NOTE] R11 has changed:0000000000200ed7 - assuming clobbered by SYSRET insn
[OK] R8..R15 did not leak kernel data
[RUN] Executing 6-argument 32-bit syscall via INT 80
[OK] Arguments are preserved across syscall
[OK] R8..R15 did not leak kernel data
[RUN] Running tests under ptrace
[RUN] Executing 6-argument 32-bit syscall via VDSO
[OK] Arguments are preserved across syscall
[OK] R8..R15 did not leak kernel data
[RUN] Executing 6-argument 32-bit syscall via INT 80
[OK] Arguments are preserved across syscall
[OK] R8..R15 did not leak kernel data

Signed-off-by: Denys Vlasenko <dvlasenk@xxxxxxxxxx>
CC: Ingo Molnar <mingo@xxxxxxxxxx>
CC: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
CC: Krzysztof A. Sobiecki <sobkas@xxxxxxxxx>
CC: Steven Rostedt <rostedt@xxxxxxxxxxx>
CC: Borislav Petkov <bp@xxxxxxxxx>
CC: "H. Peter Anvin" <hpa@xxxxxxxxx>
CC: Andy Lutomirski <luto@xxxxxxxxxxxxxx>
CC: Oleg Nesterov <oleg@xxxxxxxxxx>
CC: Frederic Weisbecker <fweisbec@xxxxxxxxx>
CC: Alexei Starovoitov <ast@xxxxxxxxxxxx>
CC: Will Drewry <wad@xxxxxxxxxxxx>
CC: Kees Cook <keescook@xxxxxxxxxxxx>
CC: x86@xxxxxxxxxx
CC: linux-kernel@xxxxxxxxxxxxxxx
---
arch/x86/entry/entry_64_compat.S | 140 +++++++++++++++++++--------------------
1 file changed, 70 insertions(+), 70 deletions(-)

diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 2d0a2f0..6ee70fd 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -95,74 +95,25 @@ ENTRY(entry_SYSENTER_compat)
* Sysenter doesn't filter flags, so we need to clear NT ourselves.
*/
testl $X86_EFLAGS_NT, EFLAGS(%rsp)
- jnz sysenter_fix_flags
-sysenter_flags_fixed:
- jmp sysenter_jumps_here
-
-sysenter_fix_flags:
+ jz sysenter_jumps_here
pushq $(X86_EFLAGS_IF|X86_EFLAGS_FIXED)
popfq
- jmp sysenter_flags_fixed
-ENDPROC(entry_SYSENTER_compat)
-
-
- #ifdef CONFIG_AUDITSYSCALL
- .macro auditsys_entry_common
- /*
- * At this point, registers hold syscall args in the 32-bit syscall ABI:
- * EAX is syscall number, the 6 args are in EBX,ECX,EDX,ESI,EDI,EBP.
- *
- * We want to pass them to __audit_syscall_entry(), which is a 64-bit
- * C function with 5 parameters, so shuffle them to match what
- * the function expects: RDI,RSI,RDX,RCX,R8.
- */
- movl %esi, %r8d /* arg5 (R8 ) <= 4th syscall arg (ESI) */
- xchg %ecx, %edx /* arg4 (RCX) <= 3rd syscall arg (EDX) */
- /* arg3 (RDX) <= 2nd syscall arg (ECX) */
- movl %ebx, %esi /* arg2 (RSI) <= 1st syscall arg (EBX) */
- movl %eax, %edi /* arg1 (RDI) <= syscall number (EAX) */
- call __audit_syscall_entry
-
- /*
- * We are going to jump back to the syscall dispatch code.
- * Prepare syscall args as required by the 64-bit C ABI.
- * Registers clobbered by __audit_syscall_entry() are
- * loaded from pt_regs on stack:
- */
- movl ORIG_RAX(%rsp), %eax /* syscall number */
- movl %ebx, %edi /* arg1 */
- movl RCX(%rsp), %esi /* arg2 */
- movl RDX(%rsp), %edx /* arg3 */
- movl RSI(%rsp), %ecx /* arg4 */
- movl RDI(%rsp), %r8d /* arg5 */
- .endm
-
- .macro auditsys_exit exit
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_NONE)
- testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
- jnz ia32_ret_from_sys_call
- movl %eax, %esi /* second arg, syscall return value */
- cmpl $-MAX_ERRNO, %eax /* is it an error ? */
- jbe 1f
- movslq %eax, %rsi /* if error sign extend to 64 bits */
- 1: setbe %al /* 1 if error, 0 if not */
- movzbl %al, %edi /* zero-extend that into %edi */
- call __audit_syscall_exit
- movq RAX(%rsp), %rax /* reload syscall return value */
- movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %edi
- DISABLE_INTERRUPTS(CLBR_NONE)
- TRACE_IRQS_OFF
- testl %edi, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
- jz \exit
- xorl %eax, %eax /* Do not leak kernel information */
- movq %rax, R11(%rsp)
- movq %rax, R10(%rsp)
- movq %rax, R9(%rsp)
- movq %rax, R8(%rsp)
- jmp int_ret_from_sys_call_irqs_off
- .endm
- #endif
+ jmp sysenter_jumps_here
+ /*
+ * SYSEXIT insn is not obviously safe for 64-bit kernels --
+ * an NMI between STI and SYSEXIT has poorly specified behavior,
+ * and NMI followed by an IRQ with usergs is fatal.
+ * So we just pretend we're using SYSEXIT but we really use
+ * SYSRETL instead. (Yes, SYSRETL works even on Intel CPUs.)
+ * We do that by reusing the entire SYSCALL code path:
+ * the jump above takes us there.
+ *
+ * The difference of SYSENTER 32-bit ABI versus SYSCALL
+ * is that SYSENTER ABI does not promise to preserve EDX and EBP,
+ * SYSCALL does.
+ */
+ENDPROC(entry_SYSENTER_compat)
+
/*
* 32-bit SYSCALL instruction entry.
*
@@ -285,13 +236,62 @@ sysretl_from_sys_call:

#ifdef CONFIG_AUDITSYSCALL
cstar_auditsys:
- movl %r9d, R9(%rsp) /* register to be clobbered by call */
- auditsys_entry_common
- movl R9(%rsp), %r9d /* reload 6th syscall arg */
+ movl %r9d, R9(%rsp) /* R9 is callee-clobbered, save it */
+ /*
+ * At this point, registers hold syscall args in the 32-bit syscall ABI:
+ * EAX is syscall number, the 6 args are in EBX,ECX,EDX,ESI,EDI,
+ * 6th arg is in R9.
+ *
+ * We want to pass them to __audit_syscall_entry(), which is a 64-bit
+ * C function with 5 parameters, so shuffle them to match what
+ * the function expects: RDI,RSI,RDX,RCX,R8.
+ */
+ movl %esi, %r8d /* arg5 (R8 ) <= 4th syscall arg (ESI) */
+ xchg %ecx, %edx /* arg4 (RCX) <= 3rd syscall arg (EDX) */
+ /* arg3 (RDX) <= 2nd syscall arg (ECX) */
+ movl %ebx, %esi /* arg2 (RSI) <= 1st syscall arg (EBX) */
+ movl %eax, %edi /* arg1 (RDI) <= syscall number (EAX) */
+ call __audit_syscall_entry
+
+ /*
+ * We are going to jump back to the syscall dispatch code.
+ * Prepare syscall args as required by the 64-bit C ABI.
+ * Registers clobbered by __audit_syscall_entry() are
+ * loaded from pt_regs on stack:
+ */
+ movl ORIG_RAX(%rsp), %eax /* syscall number */
+ movl %ebx, %edi /* arg1 */
+ movl RCX(%rsp), %esi /* arg2 */
+ movl RDX(%rsp), %edx /* arg3 */
+ movl RSI(%rsp), %ecx /* arg4 */
+ movl RDI(%rsp), %r8d /* arg5 */
+ movl R9(%rsp), %r9d /* arg6 */
jmp cstar_dispatch

sysretl_audit:
- auditsys_exit sysretl_from_sys_call
+ TRACE_IRQS_ON
+ ENABLE_INTERRUPTS(CLBR_NONE)
+ testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+ jnz ia32_ret_from_sys_call
+ movl %eax, %esi /* second arg, syscall return value */
+ cmpl $-MAX_ERRNO, %eax /* is it an error ? */
+ jbe 1f
+ movslq %eax, %rsi /* if error sign extend to 64 bits */
+1: setbe %al /* 1 if error, 0 if not */
+ movzbl %al, %edi /* zero-extend that into %edi */
+ call __audit_syscall_exit
+ movq RAX(%rsp), %rax /* reload syscall return value */
+ movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %edi
+ DISABLE_INTERRUPTS(CLBR_NONE)
+ TRACE_IRQS_OFF
+ testl %edi, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+ jz sysretl_from_sys_call
+ xorl %eax, %eax /* Do not leak kernel information */
+ movq %rax, R11(%rsp)
+ movq %rax, R10(%rsp)
+ movq %rax, R9(%rsp)
+ movq %rax, R8(%rsp)
+ jmp int_ret_from_sys_call_irqs_off
#endif

cstar_tracesys:
--
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/