Re: [PATCH] tracing/function-graph-tracer: tracing only syscallsmode

From: Frederic Weisbecker
Date: Mon Jan 05 2009 - 18:58:03 EST


On Mon, Jan 05, 2009 at 02:11:08PM -0500, Steven Rostedt wrote:
>
> On Tue, 30 Dec 2008, Frederic Weisbecker wrote:
>
> > Impact: make more easy the syscalls tracing
> >
> > This patch extends to syscalls the features which let one to trace
> > only one ore more function (and those they call).
> > This way we can get rid of the workqueues, kernel threads, softirqs from
> > ksoftirqd and only have the syscall path on the trace.
> >
> > Note that hardirq that interrupt the syscalls are still traced. But we could
> > add an option to disable the interrupt tracing as well in the future.
> >
> > To use this new feature:
> >
> > echo function_graph > /debugfs/tracing/current_tracer
> > echo syscalls > /debugfs/tracing/set_graph_function
> >
> > Signed-off-by: Frederic Weisbecker <fweisbec@xxxxxxxxx>
> > ---
> > arch/x86/include/asm/thread_info.h | 4 +-
> > arch/x86/kernel/ptrace.c | 4 ++
> > include/linux/ftrace.h | 104 ++++++++++++++++++++----------------
> > kernel/trace/ftrace.c | 61 +++++++++++++++++++++
> > kernel/trace/trace.h | 3 +-
> > 5 files changed, 127 insertions(+), 49 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
> > index efdf938..447fa17 100644
> > --- a/arch/x86/include/asm/thread_info.h
> > +++ b/arch/x86/include/asm/thread_info.h
> > @@ -94,6 +94,7 @@ struct thread_info {
> > #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
> > #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */
> > #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */
> > +#define TIF_SYSCALL_FTRACE 27 /* For ftrace syscalls tracing */
> >
> > #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
> > #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
> > @@ -116,10 +117,11 @@ struct thread_info {
> > #define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
> > #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR)
> > #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR)
> > +#define _TIF_SYSCALL_FTRACE (1 << TIF_SYSCALL_FTRACE)
> >
> > /* work to do in syscall_trace_enter() */
> > #define _TIF_WORK_SYSCALL_ENTRY \
> > - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | \
> > + (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_FTRACE |\
> > _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | _TIF_SINGLESTEP)
> >
> > /* work to do in syscall_trace_leave() */
> > diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
> > index 0a5df5f..1a6e72f 100644
> > --- a/arch/x86/kernel/ptrace.c
> > +++ b/arch/x86/kernel/ptrace.c
> > @@ -21,6 +21,7 @@
> > #include <linux/audit.h>
> > #include <linux/seccomp.h>
> > #include <linux/signal.h>
> > +#include <linux/ftrace.h>
> >
> > #include <asm/uaccess.h>
> > #include <asm/pgtable.h>
> > @@ -1397,6 +1398,9 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
> > {
> > long ret = 0;
> >
> > + /* Tell the function graph tracer we are entering a system call */
> > + ftrace_graph_syscall_enter();
> > +
>
> You would not mind me converting this to a tracepoint so that other
> tracers could tap into this.


Good idea!


> > /*
> > * If we stepped into a sysenter/syscall insn, it trapped in
> > * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
> > diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> > index 677432b..4f22902 100644
> > --- a/include/linux/ftrace.h
> > +++ b/include/linux/ftrace.h
> > @@ -357,6 +357,52 @@ static inline void trace_power_end(struct power_trace *it) { }
> > #endif
> >
> >
> > +#ifdef CONFIG_TRACING
> > +#include <linux/sched.h>
> > +
> > +/* flags for current->trace */
> > +enum {
> > + TSK_TRACE_FL_TRACE_BIT = 0,
> > + TSK_TRACE_FL_GRAPH_BIT = 1,
> > +};
> > +enum {
> > + TSK_TRACE_FL_TRACE = 1 << TSK_TRACE_FL_TRACE_BIT,
> > + TSK_TRACE_FL_GRAPH = 1 << TSK_TRACE_FL_GRAPH_BIT,
> > +};
> > +
> > +static inline void set_tsk_trace_trace(struct task_struct *tsk)
> > +{
> > + set_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace);
> > +}
> > +
> > +static inline void clear_tsk_trace_trace(struct task_struct *tsk)
> > +{
> > + clear_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace);
> > +}
> > +
> > +static inline int test_tsk_trace_trace(struct task_struct *tsk)
> > +{
> > + return tsk->trace & TSK_TRACE_FL_TRACE;
> > +}
> > +
> > +static inline void set_tsk_trace_graph(struct task_struct *tsk)
> > +{
> > + set_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace);
> > +}
> > +
> > +static inline void clear_tsk_trace_graph(struct task_struct *tsk)
> > +{
> > + clear_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace);
> > +}
> > +
> > +static inline int test_tsk_trace_graph(struct task_struct *tsk)
> > +{
> > + return tsk->trace & TSK_TRACE_FL_GRAPH;
> > +}
> > +
> > +#endif /* CONFIG_TRACING */
> > +
> > +
> > /*
> > * Structure that defines an entry function trace.
> > */
> > @@ -430,7 +476,15 @@ static inline void unpause_graph_tracing(void)
> > {
> > atomic_dec(&current->tracing_graph_pause);
> > }
> > -#else
> > +
> > +extern bool ftrace_graph_syscalls;
> > +
> > +static inline void ftrace_graph_syscall_enter(void)
> > +{
> > + if (ftrace_graph_syscalls)
> > + set_tsk_trace_graph(current);
>
> We could eliminate this global variable with the tracepoint.


Not really, it is needed on the test when we trace functions on entry
to decide if we ignore or not.


> > +}
> > +#else /* !CONFIG_FUNCTION_GRAPH_TRACER */
> >
> > #define __notrace_funcgraph
> > #define __irq_entry
> > @@ -445,51 +499,7 @@ static inline int task_curr_ret_stack(struct task_struct *tsk)
> >
> > static inline void pause_graph_tracing(void) { }
> > static inline void unpause_graph_tracing(void) { }
> > -#endif
> > -
> > -#ifdef CONFIG_TRACING
> > -#include <linux/sched.h>
> > -
> > -/* flags for current->trace */
> > -enum {
> > - TSK_TRACE_FL_TRACE_BIT = 0,
> > - TSK_TRACE_FL_GRAPH_BIT = 1,
> > -};
> > -enum {
> > - TSK_TRACE_FL_TRACE = 1 << TSK_TRACE_FL_TRACE_BIT,
> > - TSK_TRACE_FL_GRAPH = 1 << TSK_TRACE_FL_GRAPH_BIT,
> > -};
> > -
> > -static inline void set_tsk_trace_trace(struct task_struct *tsk)
> > -{
> > - set_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace);
> > -}
> > -
> > -static inline void clear_tsk_trace_trace(struct task_struct *tsk)
> > -{
> > - clear_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace);
> > -}
> > -
> > -static inline int test_tsk_trace_trace(struct task_struct *tsk)
> > -{
> > - return tsk->trace & TSK_TRACE_FL_TRACE;
> > -}
> > -
> > -static inline void set_tsk_trace_graph(struct task_struct *tsk)
> > -{
> > - set_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace);
> > -}
> > -
> > -static inline void clear_tsk_trace_graph(struct task_struct *tsk)
> > -{
> > - clear_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace);
> > -}
> > -
> > -static inline int test_tsk_trace_graph(struct task_struct *tsk)
> > -{
> > - return tsk->trace & TSK_TRACE_FL_GRAPH;
> > -}
> > -
> > -#endif /* CONFIG_TRACING */
> > +static inline void ftrace_graph_syscall_enter(void) { }
> > +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
> >
> > #endif /* _LINUX_FTRACE_H */
> > diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> > index 2f32969..9ed22ee 100644
> > --- a/kernel/trace/ftrace.c
> > +++ b/kernel/trace/ftrace.c
> > @@ -1338,6 +1338,40 @@ static DEFINE_MUTEX(graph_lock);
> >
> > int ftrace_graph_count;
> > unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
> > +bool ftrace_graph_syscalls;
> > +static char *syscall_str = "syscalls";
> > +
> > +/* Tell ptrace that we trace all syscalls from all tasks */
> > +static void ftrace_graph_syscalls_start(void)
> > +{
> > + struct task_struct *g, *t;
> > + unsigned long flags;
> > +
> > + ftrace_graph_syscalls = true;
>
> If we keep this global, it would be better to have it after you set the
> TIF_SYSCALL_FTRACE flag, otherwise your traces might look funny. With some
> tasks being traced and others not.
>
> -- Steve


Nice catch. It's indeed racy.
I will resend a fixed patch when I find some time.
Thanks!


> > +
> > + read_lock_irqsave(&tasklist_lock, flags);
> > + do_each_thread(g, t) {
> > + set_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
> > + } while_each_thread(g, t);
> > +
> > + read_unlock_irqrestore(&tasklist_lock, flags);
> > +}
> > +
> > +/* Remove the syscall flag trace for all tasks */
> > +static void ftrace_graph_syscalls_stop(void)
> > +{
> > + struct task_struct *g, *t;
> > + unsigned long flags;
> > +
> > + ftrace_graph_syscalls = false;
> > +
> > + read_lock_irqsave(&tasklist_lock, flags);
> > + do_each_thread(g, t) {
> > + clear_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
> > + } while_each_thread(g, t);
> > +
> > + read_unlock_irqrestore(&tasklist_lock, flags);
> > +}
> >
> > static void *
> > g_next(struct seq_file *m, void *v, loff_t *pos)
> > @@ -1359,6 +1393,13 @@ static void *g_start(struct seq_file *m, loff_t *pos)
> >
> > mutex_lock(&graph_lock);
> >
> > + /*
> > + * Syscalls tracing is special and is not on the
> > + * ftrace_graph_funcs tab. Print it as the first entry.
> > + */
> > + if (!*pos && ftrace_graph_syscalls)
> > + return syscall_str;
> > +
> > p = g_next(m, p, pos);
> >
> > return p;
> > @@ -1377,6 +1418,11 @@ static int g_show(struct seq_file *m, void *v)
> > if (!ptr)
> > return 0;
> >
> > + if (v == syscall_str) {
> > + seq_printf(m, "%s\n", syscall_str);
> > + return 0;
> > + }
> > +
> > kallsyms_lookup(*ptr, NULL, NULL, NULL, str);
> >
> > seq_printf(m, "%s\n", str);
> > @@ -1403,6 +1449,7 @@ ftrace_graph_open(struct inode *inode, struct file *file)
> > if ((file->f_mode & FMODE_WRITE) &&
> > !(file->f_flags & O_APPEND)) {
> > ftrace_graph_count = 0;
> > + ftrace_graph_syscalls_stop();
> > memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
> > }
> >
> > @@ -1533,6 +1580,15 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
> > }
> > buffer[index] = 0;
> >
> > + /*
> > + * Catch the special syscall tracing request,
> > + * it is not enabled like other functions.
> > + */
> > + if (!strcmp(syscall_str, buffer)) {
> > + ftrace_graph_syscalls_start();
> > + goto out_syscalls;
> > + }
> > +
> > /* we allow only one at a time */
> > ret = ftrace_set_func(array, ftrace_graph_count, buffer);
> > if (ret)
> > @@ -1540,6 +1596,7 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
> >
> > ftrace_graph_count++;
> >
> > +out_syscalls:
> > file->f_pos += read;
> >
> > ret = read;
> > @@ -2091,6 +2148,10 @@ void ftrace_graph_init_task(struct task_struct *t)
> > t->curr_ret_stack = -1;
> > atomic_set(&t->tracing_graph_pause, 0);
> > atomic_set(&t->trace_overrun, 0);
> > +
> > + if (ftrace_graph_syscalls)
> > + set_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
> > +
> > } else
> > t->ret_stack = NULL;
> > }
> > diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> > index a8b624c..f372fbb 100644
> > --- a/kernel/trace/trace.h
> > +++ b/kernel/trace/trace.h
> > @@ -521,7 +521,8 @@ static inline int ftrace_graph_addr(unsigned long addr)
> > {
> > int i;
> >
> > - if (!ftrace_graph_count || test_tsk_trace_graph(current))
> > + if ((!ftrace_graph_count && !ftrace_graph_syscalls)
> > + || test_tsk_trace_graph(current))
> > return 1;
> >
> > for (i = 0; i < ftrace_graph_count; i++) {
> > --
> > 1.6.0.4
> >
> >
> >
> >

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/