[PATCH 1/5] kernel: add a new helper to execute system calls from kernel code

From: Andrei Vagin
Date: Fri Jul 22 2022 - 19:02:59 EST


This helper will be used to implement a kvm hypercall to call host
system calls.

The new helper executes seccomp rules and calls trace_sys_{enter,exit}
hooks. But it intentionally doesn't call ptrace hooks because calling
syscalls are not linked with the current process state.

Signed-off-by: Andrei Vagin <avagin@xxxxxxxxxx>
---
arch/x86/entry/common.c | 50 ++++++++++++++++++++++++++++++++++
arch/x86/include/asm/syscall.h | 1 +
2 files changed, 51 insertions(+)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 6c2826417b33..7f4c172a9a4e 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -19,6 +19,7 @@
#include <linux/nospec.h>
#include <linux/syscalls.h>
#include <linux/uaccess.h>
+#include <trace/events/syscalls.h>

#ifdef CONFIG_XEN_PV
#include <xen/xen-ops.h>
@@ -37,6 +38,55 @@

#ifdef CONFIG_X86_64

+/*
+ * do_ksyscall_64 executes a system call. This helper can be used from the
+ * kernel code.
+ */
+bool do_ksyscall_64(int nr, struct pt_regs *regs)
+{
+ struct task_struct *task = current;
+ unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
+ /*
+ * Convert negative numbers to very high and thus out of range
+ * numbers for comparisons.
+ */
+ unsigned int unr = nr;
+
+#ifdef CONFIG_IA32_EMULATION
+ if (task->thread_info.status & TS_COMPAT)
+ return false;
+#endif
+
+ if (work & SYSCALL_WORK_SECCOMP) {
+ struct seccomp_data sd;
+ unsigned long args[6];
+
+ sd.nr = nr;
+ sd.arch = AUDIT_ARCH_X86_64;
+ syscall_get_arguments(task, regs, args);
+ sd.args[0] = args[0];
+ sd.args[1] = args[1];
+ sd.args[2] = args[2];
+ sd.args[3] = args[3];
+ sd.args[4] = args[4];
+ sd.args[5] = args[5];
+ sd.instruction_pointer = regs->ip;
+ if (__secure_computing(&sd) == -1)
+ return false;
+ }
+
+ if (likely(unr >= NR_syscalls))
+ return false;
+
+ unr = array_index_nospec(unr, NR_syscalls);
+
+ trace_sys_enter(regs, unr);
+ regs->ax = sys_call_table[unr](regs);
+ trace_sys_exit(regs, syscall_get_return_value(task, regs));
+ return true;
+}
+EXPORT_SYMBOL_GPL(do_ksyscall_64);
+
static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
{
/*
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 5b85987a5e97..6cde1ddeb50b 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -126,6 +126,7 @@ static inline int syscall_get_arch(struct task_struct *task)
? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
}

+bool do_ksyscall_64(int nr, struct pt_regs *regs);
void do_syscall_64(struct pt_regs *regs, int nr);
void do_int80_syscall_32(struct pt_regs *regs);
long do_fast_syscall_32(struct pt_regs *regs);
--
2.37.1.359.gd136c6c3e2-goog