[RFC patch 18/20] LTTng Linux Kernel Trace Thread Flag x86

From: Mathieu Desnoyers
Date: Sun Mar 15 2009 - 16:26:34 EST


Add a thread flag to activate system-wide syscall tracing.

Make x86 support TIF_SYSCALL_TRACE async flag set in entry_32.S/entry_64.S.

x86_64 :

When the flag is inactive upon syscall entry and concurrently activated before
exit, we seem to reach a state where the top of stack is incorrect upon return
to user space.

Fix this by fixing the top of stack and jumping to int_ret_from_sys_call if we
detect that thread flags has been modified.

We make sure that the thread flag read is coherent between our new test and the ALLWORK_MASK test by first saving it in a register used for both comparisons.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxx>
CC: Andi Kleen <ak@xxxxxx>
CC: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
CC: Ingo Molnar <mingo@xxxxxxxxxx>
CC: H. Peter Anvin <hpa@xxxxxxxxx>
---
arch/x86/include/asm/thread_info.h | 9 ++++++---
arch/x86/kernel/entry_32.S | 3 ++-
arch/x86/kernel/entry_64.S | 12 ++++++++++++
3 files changed, 20 insertions(+), 4 deletions(-)

Index: linux-2.6-lttng/arch/x86/include/asm/thread_info.h
===================================================================
--- linux-2.6-lttng.orig/arch/x86/include/asm/thread_info.h 2009-03-15 15:51:26.000000000 -0400
+++ linux-2.6-lttng/arch/x86/include/asm/thread_info.h 2009-03-15 15:57:19.000000000 -0400
@@ -81,6 +81,7 @@ struct thread_info {
#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
+#define TIF_KERNEL_TRACE 9 /* kernel trace active */
#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_IA32 17 /* 32bit process */
@@ -103,6 +104,7 @@ struct thread_info {
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
+#define _TIF_KERNEL_TRACE (1 << TIF_KERNEL_TRACE)
#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY)
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_IA32 (1 << TIF_IA32)
@@ -117,17 +119,18 @@ struct thread_info {

/* work to do in syscall_trace_enter() */
#define _TIF_WORK_SYSCALL_ENTRY \
- (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | \
+ (_TIF_SYSCALL_TRACE | _TIF_KERNEL_TRACE | _TIF_SYSCALL_EMU | \
_TIF_SYSCALL_AUDIT | _TIF_SECCOMP | _TIF_SINGLESTEP)

/* work to do in syscall_trace_leave() */
#define _TIF_WORK_SYSCALL_EXIT \
- (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP)
+ (_TIF_SYSCALL_TRACE | _TIF_KERNEL_TRACE | _TIF_SYSCALL_AUDIT | \
+ _TIF_SINGLESTEP)

/* work to do on interrupt/exception return */
#define _TIF_WORK_MASK \
(0x0000FFFF & \
- ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT| \
+ ~(_TIF_SYSCALL_TRACE|_TIF_KERNEL_TRACE|_TIF_SYSCALL_AUDIT| \
_TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU))

/* work to do on any return to user space */
Index: linux-2.6-lttng/arch/x86/kernel/entry_32.S
===================================================================
--- linux-2.6-lttng.orig/arch/x86/kernel/entry_32.S 2009-03-15 15:51:26.000000000 -0400
+++ linux-2.6-lttng/arch/x86/kernel/entry_32.S 2009-03-15 15:57:19.000000000 -0400
@@ -571,7 +571,8 @@ END(syscall_trace_entry)
# perform syscall exit tracing
ALIGN
syscall_exit_work:
- testb $_TIF_WORK_SYSCALL_EXIT, %cl
+ /* Note, _TIF_KERNEL_TRACE is bit number 9, and so it needs testw and not testb */
+ testw $_TIF_WORK_SYSCALL_EXIT, %cx
jz work_pending
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call
Index: linux-2.6-lttng/arch/x86/kernel/entry_64.S
===================================================================
--- linux-2.6-lttng.orig/arch/x86/kernel/entry_64.S 2009-03-15 15:51:19.000000000 -0400
+++ linux-2.6-lttng/arch/x86/kernel/entry_64.S 2009-03-15 15:57:19.000000000 -0400
@@ -530,6 +530,8 @@ sysret_check:
/* Handle reschedules */
/* edx: work, edi: workmask */
sysret_careful:
+ testl $_TIF_KERNEL_TRACE,%edx /* Re-read : concurrently changed */
+ jnz ret_from_sys_call_trace
bt $TIF_NEED_RESCHED,%edx
jnc sysret_signal
TRACE_IRQS_ON
@@ -541,6 +543,16 @@ sysret_careful:
CFI_ADJUST_CFA_OFFSET -8
jmp sysret_check

+ret_from_sys_call_trace:
+ TRACE_IRQS_ON
+ sti
+ SAVE_REST
+ FIXUP_TOP_OF_STACK %rdi
+ movq %rsp,%rdi
+ LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
+ RESTORE_REST
+ jmp int_ret_from_sys_call
+
/* Handle a signal */
sysret_signal:
TRACE_IRQS_ON

--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/