[PATCH] x86/entry/64: Remove duplicate syscall table for fast path
From: Andy Lutomirski
Date: Wed Dec 09 2015 - 14:30:54 EST
Instead of using a duplicate syscall table for the fast path, create
stubs for the syscalls that need pt_regs that dispatch based on the
call site.
I think that this is very likely to introduce a mis-predicted branch
in all such syscalls. I think that's fine -- all of them are
already very slow.
Heavily based on a patch from Brian Gerst [1].
[1] http://lkml.kernel.org/g/1449666173-15366-1-git-send-email-brgerst@xxxxxxxxx
Signed-off-by: Brian Gerst <brgerst@xxxxxxxxx>
Cc: the arch/x86 maintainers <x86@xxxxxxxxxx>
Cc: Linux Kernel Mailing List <linux-kernel@xxxxxxxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: FrÃdÃric Weisbecker <fweisbec@xxxxxxxxx>
Cc: Denys Vlasenko <dvlasenk@xxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx>
---
Brian, here's a counter-proposal. It's derived from your patch, but it works
differently.
If people like this, I'll send a new version of the whole series that includes
it at the end.
arch/x86/entry/entry_64.S | 49 ++++++++++++++++++++++++++++++++++++++-------
arch/x86/entry/syscall_64.c | 25 +++++------------------
2 files changed, 47 insertions(+), 27 deletions(-)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1ab5362f241d..16779b52419e 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -188,7 +188,15 @@ entry_SYSCALL_64_fastpath:
#endif
ja 1f /* return -ENOSYS (already in pt_regs->ax) */
movq %r10, %rcx
- call *sys_call_table_fastpath_64(, %rax, 8)
+
+ /*
+ * This call instruction is handled specially in stub_ptregs_64.
+ * It might end up jumping to the slow path. If it jumps, rax and
+ * r11 are clobbered.
+ */
+ call *sys_call_table(, %rax, 8)
+.Lentry_SYSCALL_64_after_fastpath_call:
+
movq %rax, RAX(%rsp)
1:
@@ -306,15 +314,42 @@ END(entry_SYSCALL_64)
ENTRY(stub_ptregs_64)
/*
- * Syscalls marked as needing ptregs that go through the fast path
- * land here. We transfer to the slow path.
+ * Syscalls marked as needing ptregs land here.
+ * If we are on the fast path, we need to save the extra regs.
+ * If we are on the slow path, the extra regs are already saved.
+ *
+ * RAX stores a pointer to the C function implementing the syscall.
+ *
+ * We can safely clobber RAX (clobbered by return value regardless)
+ * and R11 (owned by callee and never stores an argument) regardless
+ * of which path we take.
*/
- DISABLE_INTERRUPTS(CLBR_NONE)
- TRACE_IRQS_OFF
- addq $8, %rsp
- jmp entry_SYSCALL64_slow_path
+ leaq .Lentry_SYSCALL_64_after_fastpath_call(%rip), %r11
+ cmpq %r11, (%rsp)
+ jne 1f
+
+ /* Called from fast path -- pop return address and jump to slow path */
+ popq %rax
+ jmp entry_SYSCALL64_slow_path /* called from fast path */
+
+1:
+ /* Called from C */
+ jmp *%rax /* called from C */
END(stub_ptregs_64)
+.macro ptregs_stub func
+ENTRY(ptregs_\func)
+ leaq \func(%rip), %rax
+ jmp stub_ptregs_64
+END(ptregs_\func)
+.endm
+
+/* Instantiate ptregs_stub for each ptregs-using syscall */
+#define __SYSCALL_64_QUAL_(sym)
+#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
+#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
+#include <asm/syscalls_64.h>
+
/*
* A newly forked process directly context switches into this address.
*
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index 601745c667ce..9dbc5abb6162 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -6,11 +6,14 @@
#include <asm/asm-offsets.h>
#include <asm/syscall.h>
-#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_64_QUAL_(sym) sym
+#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym
+
+#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
#include <asm/syscalls_64.h>
#undef __SYSCALL_64
-#define __SYSCALL_64(nr, sym, qual) [nr] = sym,
+#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym),
extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
@@ -22,21 +25,3 @@ asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
[0 ... __NR_syscall_max] = &sys_ni_syscall,
#include <asm/syscalls_64.h>
};
-
-#undef __SYSCALL_64
-
-extern long stub_ptregs_64(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
-
-#define __SYSCALL_64_QUAL_(nr, sym) [nr] = sym,
-#define __SYSCALL_64_QUAL_ptregs(nr, sym) [nr] = stub_ptregs_64,
-
-#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(nr, sym)
-
-asmlinkage const sys_call_ptr_t sys_call_table_fastpath_64[__NR_syscall_max+1] = {
- /*
- * Smells like a compiler bug -- it doesn't work
- * when the & below is removed.
- */
- [0 ... __NR_syscall_max] = &sys_ni_syscall,
-#include <asm/syscalls_64.h>
-};
--
2.5.0
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/