[PATCH 3/3] x86: Clean up do_fast_syscall_32() tests

From: Brian Gerst
Date: Sat Jul 18 2020 - 10:38:53 EST


Reorganize the tests for SYSEXITS/SYSRETL, cleaning up comments and merging
native and compat code.

Signed-off-by: Brian Gerst <brgerst@xxxxxxxxx>
---
arch/x86/entry/common.c | 85 ++++++++++++++------------------
arch/x86/entry/entry_32.S | 6 +--
arch/x86/entry/entry_64_compat.S | 13 ++---
arch/x86/include/asm/segment.h | 1 +
arch/x86/include/asm/syscall.h | 2 +-
5 files changed, 48 insertions(+), 59 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index bdb4c15b8610e..df1497fa554b8 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -500,10 +500,24 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
exit_to_user_mode();
}

+/* Returns true to return using SYSEXIT/SYSRETL, or false to return using IRET */
static bool __do_fast_syscall_32(struct pt_regs *regs)
{
+ /*
+ * Called using the internal vDSO SYSENTER/SYSCALL32 calling
+ * convention. Adjust regs so it looks like we entered using int80.
+ */
+ unsigned long landing_pad = (unsigned long)current->mm->context.vdso +
+ vdso_image_32.sym_int80_landing_pad;
int res;

+ /*
+ * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
+ * so that 'regs->ip -= 2' lands back on an int $0x80 instruction.
+ * Fix it up.
+ */
+ regs->ip = landing_pad;
+
/* Fetch EBP from where the vDSO stashed it. */
if (IS_ENABLED(CONFIG_X86_64)) {
/*
@@ -522,34 +536,39 @@ static bool __do_fast_syscall_32(struct pt_regs *regs)
regs->ax = -EFAULT;
local_irq_disable();
__prepare_exit_to_usermode(regs);
+ /* Keep it simple: use IRET. */
return false;
}

/* Now this is just like a normal syscall. */
do_syscall_32_irqs_on(regs);
+
+ /* XEN PV guests always use IRET path */
+ if (static_cpu_has(X86_FEATURE_XENPV))
+ return false;
+
+ /* CS and SS must match values set in MSR_STAR */
+ if (unlikely(regs->cs != __USER32_CS || regs->ss != __USER_DS))
+ return false;
+
+ /* EIP must point to the VDSO landing pad */
+ if (unlikely(regs->ip != landing_pad))
+ return false;
+
+ /* The TF, RF, and VM flags must be restored with IRET */
+ if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)))
+ return false;
+
+ /* Return with SYSEXIT/SYSRETL */
return true;
}

-/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
-__visible noinstr long do_fast_syscall_32(struct pt_regs *regs)
+__visible noinstr bool do_fast_syscall_32(struct pt_regs *regs)
{
- /*
- * Called using the internal vDSO SYSENTER/SYSCALL32 calling
- * convention. Adjust regs so it looks like we entered using int80.
- */
- unsigned long landing_pad = (unsigned long)current->mm->context.vdso +
- vdso_image_32.sym_int80_landing_pad;
bool success;

check_user_regs(regs);

- /*
- * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
- * so that 'regs->ip -= 2' lands back on an int $0x80 instruction.
- * Fix it up.
- */
- regs->ip = landing_pad;
-
enter_from_user_mode();
instrumentation_begin();

@@ -559,42 +578,10 @@ __visible noinstr long do_fast_syscall_32(struct pt_regs *regs)
instrumentation_end();
exit_to_user_mode();

- /* If it failed, keep it simple: use IRET. */
- if (!success)
- return 0;
-
-#ifdef CONFIG_X86_64
- /*
- * Opportunistic SYSRETL: if possible, try to return using SYSRETL.
- * SYSRETL is available on all 64-bit CPUs, so we don't need to
- * bother with SYSEXIT.
- *
- * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP,
- * because the ECX fixup above will ensure that this is essentially
- * never the case.
- */
- return regs->cs == __USER32_CS && regs->ss == __USER_DS &&
- regs->ip == landing_pad &&
- (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)) == 0;
-#else
- /*
- * Opportunistic SYSEXIT: if possible, try to return using SYSEXIT.
- *
- * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP,
- * because the ECX fixup above will ensure that this is essentially
- * never the case.
- *
- * We don't allow syscalls at all from VM86 mode, but we still
- * need to check VM, because we might be returning from sys_vm86.
- */
- return regs->cs == __USER_CS && regs->ss == __USER_DS &&
- regs->ip == landing_pad &&
- (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0;
-#endif
+ return success;
}

-/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
-__visible noinstr long do_SYSENTER_32(struct pt_regs *regs)
+__visible noinstr bool do_SYSENTER_32(struct pt_regs *regs)
{
/* SYSENTER loses RSP, but the vDSO saved it in RBP. */
regs->sp = regs->bp;
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 2d0bd5d5f0328..24979fc747df0 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -965,9 +965,9 @@ SYM_FUNC_START(entry_SYSENTER_32)

movl %esp, %eax
call do_SYSENTER_32
- /* XEN PV guests always use IRET path */
- ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
- "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
+
+ testb %al, %al
+ jz .Lsyscall_32_done

STACKLEAK_ERASE

diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 541fdaf640453..d8ac70ebb5f9e 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -137,9 +137,10 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)

movq %rsp, %rdi
call do_SYSENTER_32
- /* XEN PV guests always use IRET path */
- ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \
- "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
+
+ testb %al, %al
+ jz swapgs_restore_regs_and_return_to_usermode
+
jmp sysret32_from_system_call

.Lsysenter_fix_flags:
@@ -252,9 +253,9 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)

movq %rsp, %rdi
call do_fast_syscall_32
- /* XEN PV guests always use IRET path */
- ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \
- "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
+
+ testb %al, %al
+ jz swapgs_restore_regs_and_return_to_usermode

/* Opportunistic SYSRET */
sysret32_from_system_call:
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 6669164abadcb..da29d2cfeee83 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -135,6 +135,7 @@
#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
+#define __USER32_CS __USER_CS
#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS*8)

/* segment for calling fn: */
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 766f9b9736185..e3e2f255bdb67 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -162,7 +162,7 @@ static inline int syscall_get_arch(struct task_struct *task)

bool do_syscall_64(unsigned long nr, struct pt_regs *regs);
void do_int80_syscall_32(struct pt_regs *regs);
-long do_fast_syscall_32(struct pt_regs *regs);
+bool do_fast_syscall_32(struct pt_regs *regs);

#endif /* CONFIG_X86_32 */

--
2.26.2