[RFC patch 05/15] entry: Provide generic syscall exit function

From: Thomas Gleixner
Date: Thu Sep 19 2019 - 11:10:53 EST


Similar to syscall entry all architectures have similar and pointlessly
different code to handle pending work before returning from a syscall to
user space.

Provide a generic version.

Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
---
include/linux/entry-common.h | 31 ++++++++++++++++++++++++
kernel/entry/common.c | 55 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 86 insertions(+)

--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -45,6 +45,17 @@
_TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_EMU | \
ARCH_SYSCALL_ENTER_WORK)

+/*
+ * TIF flags handled in syscall_exit_to_usermode()
+ */
+#ifndef ARCH_SYSCALL_EXIT_WORK
+# define ARCH_SYSCALL_EXIT_WORK (0)
+#endif
+
+#define SYSCALL_EXIT_WORK \
+ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
+ _TIF_SYSCALL_TRACEPOINT | ARCH_SYSCALL_EXIT_WORK)
+
/**
* arch_syscall_enter_tracehook - Wrapper around tracehook_report_syscall_entry()
*
@@ -118,4 +129,24 @@ static inline long syscall_enter_from_us
return syscall;
}

+/**
+ * arch_syscall_exit_tracehook - Wrapper around tracehook_report_syscall_exit()
+ *
+ * Defaults to tracehook_report_syscall_exit(). Can be replaced by
+ * architecture specific code.
+ *
+ * Invoked from syscall_exit_to_usermode()
+ */
+static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step);
+
+#ifndef arch_syscall_exit_tracehook
+static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step)
+{
+ tracehook_report_syscall_exit(regs, step);
+}
+#endif
+
+/* Common syscall exit function */
+void syscall_exit_to_usermode(struct pt_regs *regs, long syscall, long retval);
+
#endif
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -31,3 +31,58 @@ long core_syscall_enter_from_usermode(st

return ret ? : syscall;
}
+
+#ifndef _TIF_SINGLESTEP
+static inline bool report_single_step(unsigned long ti_work)
+{
+ return false;
+}
+#else
+/*
+ * If TIF_SYSCALL_EMU is set, then the only reason to report is when
+ * TIF_SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall
+ * instruction has been already reported in syscall_enter_from_usermode().
+ */
+#define SYSEMU_STEP (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU)
+
+static inline bool report_single_step(unsigned long ti_work)
+{
+ return (ti_work & SYSEMU_STEP) == _TIF_SINGLESTEP;
+}
+#endif
+
+static void syscall_exit_work(struct pt_regs *regs, long retval,
+ unsigned long ti_work)
+{
+ bool step;
+
+ audit_syscall_exit(regs);
+
+ if (ti_work & _TIF_SYSCALL_TRACEPOINT)
+ trace_sys_exit(regs, retval);
+
+ step = report_single_step(ti_work);
+ if (step || ti_work & _TIF_SYSCALL_TRACE)
+ arch_syscall_exit_tracehook(regs, step);
+}
+
+void syscall_exit_to_usermode(struct pt_regs *regs, long syscall, long retval)
+{
+ unsigned long ti_work;
+
+ CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
+
+ if (IS_ENABLED(CONFIG_PROVE_LOCKING) &&
+ WARN(irqs_disabled(), "syscall %ld left IRQs disabled", syscall))
+ local_irq_enable();
+
+ rseq_syscall(regs);
+
+ /*
+ * Handle work which needs to run exactly once per syscall exit
+ * with interrupts enabled.
+ */
+ ti_work = READ_ONCE(current_thread_info()->flags);
+ if (unlikely(ti_work & SYSCALL_EXIT_WORK))
+ syscall_exit_work(regs, retval, ti_work);
+}