Re: [patch 8/8] x86/entry: Move irqflags tracing to do_int80_syscall_32()

From: Alexandre Chartre
Date: Thu Feb 27 2020 - 11:47:20 EST



On 2/25/20 11:08 PM, Thomas Gleixner wrote:
which cleans up the ASM maze.

Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
---
arch/x86/entry/common.c | 8 +++++++-
arch/x86/entry/entry_32.S | 9 ++-------
arch/x86/entry/entry_64_compat.S | 14 +++++---------
3 files changed, 14 insertions(+), 17 deletions(-)

--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -333,6 +333,7 @@ void do_syscall_64_irqs_on(unsigned long
{
syscall_entry_fixups();
do_syscall_64_irqs_on(nr, regs);
+ trace_hardirqs_on();
}

trace_hardirqs_on() is already called through syscall_return_slowpath()
(from the previous patch):

do_syscall_64()
-> do_syscall_64_irqs_on()
-> syscall_return_slowpath()
-> trace_hardirqs_on()

NOKPROBE_SYMBOL(do_syscall_64);
#endif
@@ -389,6 +390,7 @@ static __always_inline void do_syscall_3
{
syscall_entry_fixups();
do_syscall_32_irqs_on(regs);
+ trace_hardirqs_on();
}

Same here:

do_int80_syscall_32()
-> do_syscall_32_irqs_on()
-> syscall_return_slowpath()
-> trace_hardirqs_on()

NOKPROBE_SYMBOL(do_int80_syscall_32);
@@ -468,8 +470,12 @@ static __always_inline long do_fast_sysc
/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
__visible notrace long do_fast_syscall_32(struct pt_regs *regs)
{
+ long ret;
+
syscall_entry_fixups();
- return do_fast_syscall_32_irqs_on(regs);
+ ret = do_fast_syscall_32_irqs_on(regs);
+ trace_hardirqs_on();
+ return ret;
}
NOKPROBE_SYMBOL(do_fast_syscall_32);

Same here:

do_fast_syscall_32()
-> do_fast_syscall_32_irqs_on()
-> do_syscall_32_irqs_on()
-> syscall_return_slowpath()
-> trace_hardirqs_on()

Except for one case (if the get_user() call is true in
do_fast_syscall_32_irqs_on()):

do_fast_syscall_32()
-> do_fast_syscall_32_irqs_on()
-> prepare_exit_to_usermode()

So we need to call trace_hardirqs_on() but only in that case:

static __always_inline long do_fast_syscall_32_irqs_on(struct pt_regs *regs)
{
...
if (
#ifdef CONFIG_X86_64
/*
* Micro-optimization: the pointer we're following is explicitly
* 32 bits, so it can't be out of range.
*/
__get_user(*(u32 *)&regs->bp,
(u32 __user __force *)(unsigned long)(u32)regs->sp)
#else
get_user(*(u32 *)&regs->bp,
(u32 __user __force *)(unsigned long)(u32)regs->sp)
#endif
) {

/* User code screwed up. */
local_irq_disable();
regs->ax = -EFAULT;
prepare_exit_to_usermode(regs);
trace_hardirqs_on(); <<<=== HERE
return 0; /* Keep it simple: use IRET. */
}
...
}

alex.


--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -811,8 +811,7 @@ SYM_CODE_START(ret_from_fork)
/* When we fork, we trace the syscall return in the child, too. */
movl %esp, %eax
call syscall_return_slowpath
- STACKLEAK_ERASE
- jmp restore_all_switch_stack
+ jmp .Lsyscall_32_done
/* kernel thread */
1: movl %edi, %eax
@@ -968,8 +967,7 @@ SYM_FUNC_START(entry_SYSENTER_32)
STACKLEAK_ERASE
-/* Opportunistic SYSEXIT */
- TRACE_IRQS_ON /* User mode traces as IRQs on. */
+ /* Opportunistic SYSEXIT */
/*
* Setup entry stack - we keep the pointer in %eax and do the
@@ -1072,11 +1070,8 @@ SYM_FUNC_START(entry_INT80_32)
movl %esp, %eax
call do_int80_syscall_32
.Lsyscall_32_done:
-
STACKLEAK_ERASE
-restore_all:
- TRACE_IRQS_IRET
restore_all_switch_stack:
SWITCH_TO_ENTRY_STACK
CHECK_AND_APPLY_ESPFIX
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -132,8 +132,8 @@ SYM_FUNC_START(entry_SYSENTER_compat)
movq %rsp, %rdi
call do_fast_syscall_32
/* XEN PV guests always use IRET path */
- ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
- "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
+ ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \
+ "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
jmp sysret32_from_system_call
.Lsysenter_fix_flags:
@@ -244,8 +244,8 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_aft
movq %rsp, %rdi
call do_fast_syscall_32
/* XEN PV guests always use IRET path */
- ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
- "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
+ ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \
+ "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
/* Opportunistic SYSRET */
sysret32_from_system_call:
@@ -254,7 +254,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_aft
* stack. So let's erase the thread stack right now.
*/
STACKLEAK_ERASE
- TRACE_IRQS_ON /* User mode traces as IRQs on. */
+
movq RBX(%rsp), %rbx /* pt_regs->rbx */
movq RBP(%rsp), %rbp /* pt_regs->rbp */
movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */
@@ -393,9 +393,5 @@ SYM_CODE_START(entry_INT80_compat)
movq %rsp, %rdi
call do_int80_syscall_32
-.Lsyscall_32_done:
-
- /* Go back to user mode. */
- TRACE_IRQS_ON
jmp swapgs_restore_regs_and_return_to_usermode
SYM_CODE_END(entry_INT80_compat)