[PATCH 3/3] context_tracking,x86: remove extraneous irq disable & enable from context tracking on syscall entry

From: riel
Date: Thu Apr 30 2015 - 17:24:35 EST


From: Rik van Riel <riel@xxxxxxxxxx>

On syscall entry with nohz_full on, we enable interrupts, call user_exit,
disable interrupts, do something, re-enable interrupts, and go on our
merry way.

Profiling shows that a large amount of the nohz_full overhead comes
from the extraneous disabling and re-enabling of interrupts. Andy
suggested simply not enabling interrupts until after the context
tracking code has done its thing, which allows us to skip a whole
interrupt disable & re-enable cycle.

This patch builds on top of these patches by Paolo:
https://lkml.org/lkml/2015/4/28/188
https://lkml.org/lkml/2015/4/29/139

Together with this patch I posted earlier this week, the syscall path
on a nohz_full cpu seems to be about 10% faster.
https://lkml.org/lkml/2015/4/24/394

My test is a simple microbenchmark that calls getpriority() in a loop
10 million times:

run time system time
vanilla 5.49s 2.08s
__acct patch 5.21s 1.92s
both patches 4.88s 1.71s

Cc: Frederic Weisbecker <fweisbec@xxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
Cc: Heiko Carstens <heiko.carstens@xxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Suggested-by: Andy Lutomirsky <amluto@xxxxxxxxxxxxxx>
Signed-off-by: Rik van Riel <riel@xxxxxxxxxx>
---
arch/x86/kernel/entry_32.S | 4 ++--
arch/x86/kernel/entry_64.S | 4 ++--
arch/x86/kernel/ptrace.c | 6 +++++-
include/linux/context_tracking.h | 11 +++++++++++
4 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 1c309763e321..0bdf8c7057e4 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -406,7 +406,6 @@ ENTRY(ia32_sysenter_target)

pushl_cfi %eax
SAVE_ALL
- ENABLE_INTERRUPTS(CLBR_NONE)

/*
* Load the potential sixth argument from user stack.
@@ -424,6 +423,7 @@ ENTRY(ia32_sysenter_target)

testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
jnz sysenter_audit
+ ENABLE_INTERRUPTS(CLBR_NONE)
sysenter_do_call:
cmpl $(NR_syscalls), %eax
jae sysenter_badsys
@@ -647,7 +647,7 @@ END(work_pending)
syscall_trace_entry:
movl $-ENOSYS,PT_EAX(%esp)
movl %esp, %eax
- call syscall_trace_enter
+ call syscall_trace_enter /* returns with irqs enabled */
/* What it returned is what we'll actually use. */
cmpl $(NR_syscalls), %eax
jnae syscall_call
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 02c2eff7478d..f7751da7b53e 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -228,7 +228,6 @@ GLOBAL(system_call_after_swapgs)
* task preemption. We must enable interrupts only after we're done
* with using rsp_scratch:
*/
- ENABLE_INTERRUPTS(CLBR_NONE)
pushq_cfi %r11 /* pt_regs->flags */
pushq_cfi $__USER_CS /* pt_regs->cs */
pushq_cfi %rcx /* pt_regs->ip */
@@ -248,6 +247,7 @@ GLOBAL(system_call_after_swapgs)

testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jnz tracesys
+ ENABLE_INTERRUPTS(CLBR_NONE)
system_call_fastpath:
#if __SYSCALL_MASK == ~0
cmpq $__NR_syscall_max,%rax
@@ -313,7 +313,7 @@ GLOBAL(system_call_after_swapgs)
tracesys:
movq %rsp, %rdi
movl $AUDIT_ARCH_X86_64, %esi
- call syscall_trace_enter_phase1
+ call syscall_trace_enter_phase1 /* returns with interrupts enabled */
test %rax, %rax
jnz tracesys_phase2 /* if needed, run the slow path */
RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index a7bc79480719..066c86d0b68c 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1456,6 +1456,8 @@ static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
*
* NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax
* are fully functional.
+ * Called with IRQs disabled, to be enabled after the context tracking
+ * code has run.
*
* For phase 2's benefit, our return value is:
* 0: resume the syscall
@@ -1477,10 +1479,12 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
* doing anything that could touch RCU.
*/
if (work & _TIF_NOHZ) {
- user_exit();
+ user_exit_irqsoff();
work &= ~_TIF_NOHZ;
}

+ local_irq_enable();
+
#ifdef CONFIG_SECCOMP
/*
* Do seccomp first -- it should minimize exposure of other
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index 5d3719aed958..dc3b169b2b70 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -25,12 +25,23 @@ static inline void user_enter(void)
context_tracking_enter(CONTEXT_USER);

}
+
static inline void user_exit(void)
{
if (context_tracking_is_enabled())
context_tracking_exit(CONTEXT_USER);
}

+/* Called with IRQs already disabled. */
+static inline void user_exit_irqsoff(void)
+{
+ if (in_interrupt())
+ return;
+
+ if (context_tracking_is_enabled())
+ __context_tracking_exit(CONTEXT_USER);
+}
+
static inline enum ctx_state exception_enter(void)
{
enum ctx_state prev_ctx;
--
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/