[PATCH] tracing/function-graph-tracer: tracing only syscalls mode

From: Frederic Weisbecker
Date: Tue Dec 30 2008 - 19:29:42 EST


Impact: make more easy the syscalls tracing

This patch extends to syscalls the features which let one to trace
only one ore more function (and those they call).
This way we can get rid of the workqueues, kernel threads, softirqs from
ksoftirqd and only have the syscall path on the trace.

Note that hardirq that interrupt the syscalls are still traced. But we could
add an option to disable the interrupt tracing as well in the future.

To use this new feature:

echo function_graph > /debugfs/tracing/current_tracer
echo syscalls > /debugfs/tracing/set_graph_function

Signed-off-by: Frederic Weisbecker <fweisbec@xxxxxxxxx>
---
arch/x86/include/asm/thread_info.h | 4 +-
arch/x86/kernel/ptrace.c | 4 ++
include/linux/ftrace.h | 104 ++++++++++++++++++++----------------
kernel/trace/ftrace.c | 61 +++++++++++++++++++++
kernel/trace/trace.h | 3 +-
5 files changed, 127 insertions(+), 49 deletions(-)

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index efdf938..447fa17 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -94,6 +94,7 @@ struct thread_info {
#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */
#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */
+#define TIF_SYSCALL_FTRACE 27 /* For ftrace syscalls tracing */

#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
@@ -116,10 +117,11 @@ struct thread_info {
#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR)
#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR)
+#define _TIF_SYSCALL_FTRACE (1 << TIF_SYSCALL_FTRACE)

/* work to do in syscall_trace_enter() */
#define _TIF_WORK_SYSCALL_ENTRY \
- (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | \
+ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_FTRACE |\
_TIF_SYSCALL_AUDIT | _TIF_SECCOMP | _TIF_SINGLESTEP)

/* work to do in syscall_trace_leave() */
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0a5df5f..1a6e72f 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -21,6 +21,7 @@
#include <linux/audit.h>
#include <linux/seccomp.h>
#include <linux/signal.h>
+#include <linux/ftrace.h>

#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -1397,6 +1398,9 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
{
long ret = 0;

+ /* Tell the function graph tracer we are entering a system call */
+ ftrace_graph_syscall_enter();
+
/*
* If we stepped into a sysenter/syscall insn, it trapped in
* kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 677432b..4f22902 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -357,6 +357,52 @@ static inline void trace_power_end(struct power_trace *it) { }
#endif


+#ifdef CONFIG_TRACING
+#include <linux/sched.h>
+
+/* flags for current->trace */
+enum {
+ TSK_TRACE_FL_TRACE_BIT = 0,
+ TSK_TRACE_FL_GRAPH_BIT = 1,
+};
+enum {
+ TSK_TRACE_FL_TRACE = 1 << TSK_TRACE_FL_TRACE_BIT,
+ TSK_TRACE_FL_GRAPH = 1 << TSK_TRACE_FL_GRAPH_BIT,
+};
+
+static inline void set_tsk_trace_trace(struct task_struct *tsk)
+{
+ set_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace);
+}
+
+static inline void clear_tsk_trace_trace(struct task_struct *tsk)
+{
+ clear_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace);
+}
+
+static inline int test_tsk_trace_trace(struct task_struct *tsk)
+{
+ return tsk->trace & TSK_TRACE_FL_TRACE;
+}
+
+static inline void set_tsk_trace_graph(struct task_struct *tsk)
+{
+ set_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace);
+}
+
+static inline void clear_tsk_trace_graph(struct task_struct *tsk)
+{
+ clear_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace);
+}
+
+static inline int test_tsk_trace_graph(struct task_struct *tsk)
+{
+ return tsk->trace & TSK_TRACE_FL_GRAPH;
+}
+
+#endif /* CONFIG_TRACING */
+
+
/*
* Structure that defines an entry function trace.
*/
@@ -430,7 +476,15 @@ static inline void unpause_graph_tracing(void)
{
atomic_dec(&current->tracing_graph_pause);
}
-#else
+
+extern bool ftrace_graph_syscalls;
+
+static inline void ftrace_graph_syscall_enter(void)
+{
+ if (ftrace_graph_syscalls)
+ set_tsk_trace_graph(current);
+}
+#else /* !CONFIG_FUNCTION_GRAPH_TRACER */

#define __notrace_funcgraph
#define __irq_entry
@@ -445,51 +499,7 @@ static inline int task_curr_ret_stack(struct task_struct *tsk)

static inline void pause_graph_tracing(void) { }
static inline void unpause_graph_tracing(void) { }
-#endif
-
-#ifdef CONFIG_TRACING
-#include <linux/sched.h>
-
-/* flags for current->trace */
-enum {
- TSK_TRACE_FL_TRACE_BIT = 0,
- TSK_TRACE_FL_GRAPH_BIT = 1,
-};
-enum {
- TSK_TRACE_FL_TRACE = 1 << TSK_TRACE_FL_TRACE_BIT,
- TSK_TRACE_FL_GRAPH = 1 << TSK_TRACE_FL_GRAPH_BIT,
-};
-
-static inline void set_tsk_trace_trace(struct task_struct *tsk)
-{
- set_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace);
-}
-
-static inline void clear_tsk_trace_trace(struct task_struct *tsk)
-{
- clear_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace);
-}
-
-static inline int test_tsk_trace_trace(struct task_struct *tsk)
-{
- return tsk->trace & TSK_TRACE_FL_TRACE;
-}
-
-static inline void set_tsk_trace_graph(struct task_struct *tsk)
-{
- set_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace);
-}
-
-static inline void clear_tsk_trace_graph(struct task_struct *tsk)
-{
- clear_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace);
-}
-
-static inline int test_tsk_trace_graph(struct task_struct *tsk)
-{
- return tsk->trace & TSK_TRACE_FL_GRAPH;
-}
-
-#endif /* CONFIG_TRACING */
+static inline void ftrace_graph_syscall_enter(void) { }
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */

#endif /* _LINUX_FTRACE_H */
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2f32969..9ed22ee 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1338,6 +1338,40 @@ static DEFINE_MUTEX(graph_lock);

int ftrace_graph_count;
unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
+bool ftrace_graph_syscalls;
+static char *syscall_str = "syscalls";
+
+/* Tell ptrace that we trace all syscalls from all tasks */
+static void ftrace_graph_syscalls_start(void)
+{
+ struct task_struct *g, *t;
+ unsigned long flags;
+
+ ftrace_graph_syscalls = true;
+
+ read_lock_irqsave(&tasklist_lock, flags);
+ do_each_thread(g, t) {
+ set_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
+ } while_each_thread(g, t);
+
+ read_unlock_irqrestore(&tasklist_lock, flags);
+}
+
+/* Remove the syscall flag trace for all tasks */
+static void ftrace_graph_syscalls_stop(void)
+{
+ struct task_struct *g, *t;
+ unsigned long flags;
+
+ ftrace_graph_syscalls = false;
+
+ read_lock_irqsave(&tasklist_lock, flags);
+ do_each_thread(g, t) {
+ clear_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
+ } while_each_thread(g, t);
+
+ read_unlock_irqrestore(&tasklist_lock, flags);
+}

static void *
g_next(struct seq_file *m, void *v, loff_t *pos)
@@ -1359,6 +1393,13 @@ static void *g_start(struct seq_file *m, loff_t *pos)

mutex_lock(&graph_lock);

+ /*
+ * Syscalls tracing is special and is not on the
+ * ftrace_graph_funcs tab. Print it as the first entry.
+ */
+ if (!*pos && ftrace_graph_syscalls)
+ return syscall_str;
+
p = g_next(m, p, pos);

return p;
@@ -1377,6 +1418,11 @@ static int g_show(struct seq_file *m, void *v)
if (!ptr)
return 0;

+ if (v == syscall_str) {
+ seq_printf(m, "%s\n", syscall_str);
+ return 0;
+ }
+
kallsyms_lookup(*ptr, NULL, NULL, NULL, str);

seq_printf(m, "%s\n", str);
@@ -1403,6 +1449,7 @@ ftrace_graph_open(struct inode *inode, struct file *file)
if ((file->f_mode & FMODE_WRITE) &&
!(file->f_flags & O_APPEND)) {
ftrace_graph_count = 0;
+ ftrace_graph_syscalls_stop();
memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
}

@@ -1533,6 +1580,15 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
}
buffer[index] = 0;

+ /*
+ * Catch the special syscall tracing request,
+ * it is not enabled like other functions.
+ */
+ if (!strcmp(syscall_str, buffer)) {
+ ftrace_graph_syscalls_start();
+ goto out_syscalls;
+ }
+
/* we allow only one at a time */
ret = ftrace_set_func(array, ftrace_graph_count, buffer);
if (ret)
@@ -1540,6 +1596,7 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,

ftrace_graph_count++;

+out_syscalls:
file->f_pos += read;

ret = read;
@@ -2091,6 +2148,10 @@ void ftrace_graph_init_task(struct task_struct *t)
t->curr_ret_stack = -1;
atomic_set(&t->tracing_graph_pause, 0);
atomic_set(&t->trace_overrun, 0);
+
+ if (ftrace_graph_syscalls)
+ set_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
+
} else
t->ret_stack = NULL;
}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index a8b624c..f372fbb 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -521,7 +521,8 @@ static inline int ftrace_graph_addr(unsigned long addr)
{
int i;

- if (!ftrace_graph_count || test_tsk_trace_graph(current))
+ if ((!ftrace_graph_count && !ftrace_graph_syscalls)
+ || test_tsk_trace_graph(current))
return 1;

for (i = 0; i < ftrace_graph_count; i++) {
--
1.6.0.4


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/