Re: [PATCH] tracing: reduce latency and remove percpu trace_seq

From: Steven Rostedt
Date: Thu Jun 03 2010 - 09:36:23 EST


On Thu, 2010-06-03 at 18:26 +0800, Lai Jiangshan wrote:
> __print_flags() and __print_symbolic() use percpu trace_seq:
>
> 1) Its memory is allocated at compile time, it wastes memory if we don't use tracing.
> 2) It is percpu data and it wastes more memory for multi-cpus system.
> 3) It disables preemption when it executes its core routine
> "trace_seq_printf(s, "%s: ", #call);" and introduces latency.
>
> So we move this trace_seq to struct trace_iterator.

I like the patch, except for one thing.

[ added Ted because he was the one needing this ]

>
> Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
> ---
> include/linux/ftrace_event.h | 5 +++--
> include/trace/ftrace.h | 12 +++---------
> kernel/trace/trace_output.c | 3 ---
> 3 files changed, 6 insertions(+), 14 deletions(-)
> diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
> index 3167f2d..25e264d 100644
> --- a/include/linux/ftrace_event.h
> +++ b/include/linux/ftrace_event.h
> @@ -11,8 +11,6 @@ struct trace_array;
> struct tracer;
> struct dentry;
>
> -DECLARE_PER_CPU(struct trace_seq, ftrace_event_seq);
> -
> struct trace_print_flags {
> unsigned long mask;
> const char *name;
> @@ -58,6 +56,9 @@ struct trace_iterator {
> struct ring_buffer_iter *buffer_iter[NR_CPUS];
> unsigned long iter_flags;
>
> + /* trace_seq for __print_flags() and __print_symbolic() etc. */
> + struct trace_seq tmp_seq;
> +
> /* The below is zeroed out in pipe_read */
> struct trace_seq seq;
> struct trace_entry *ent;
> diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
> index 5a64905..19369db 100644
> --- a/include/trace/ftrace.h
> +++ b/include/trace/ftrace.h
> @@ -145,7 +145,7 @@
> * struct trace_seq *s = &iter->seq;
> * struct ftrace_raw_<call> *field; <-- defined in stage 1
> * struct trace_entry *entry;
> - * struct trace_seq *p;
> + * struct trace_seq *p = &iter->tmp_seq;
> * int ret;
> *
> * entry = iter->ent;
> @@ -157,12 +157,10 @@
> *
> * field = (typeof(field))entry;
> *
> - * p = &get_cpu_var(ftrace_event_seq);
> * trace_seq_init(p);
> * ret = trace_seq_printf(s, "%s: ", <call>);
> * if (ret)
> * ret = trace_seq_printf(s, <TP_printk> "\n");
> - * put_cpu();
> * if (!ret)
> * return TRACE_TYPE_PARTIAL_LINE;
> *
> @@ -216,7 +214,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags, \
> struct trace_seq *s = &iter->seq; \
> struct ftrace_raw_##call *field; \
> struct trace_entry *entry; \
> - struct trace_seq *p; \
> + struct trace_seq *p = &iter->tmp_seq; \
> int ret; \
> \
> event = container_of(trace_event, struct ftrace_event_call, \
> @@ -231,12 +229,10 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags, \
> \
> field = (typeof(field))entry; \
> \
> - p = &get_cpu_var(ftrace_event_seq); \
> trace_seq_init(p); \
> ret = trace_seq_printf(s, "%s: ", event->name); \
> if (ret) \
> ret = trace_seq_printf(s, print); \
> - put_cpu(); \

The "print" in the trace_seq_printf() is the TP_printk() field of the
TRACE_EVENT. I believe that there were some events (ext4?) that did
things that required preemption disabled.

Ted,

Is this still the case?

-- Steve

> if (!ret) \
> return TRACE_TYPE_PARTIAL_LINE; \
> \
> @@ -255,7 +251,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags, \
> struct trace_seq *s = &iter->seq; \
> struct ftrace_raw_##template *field; \
> struct trace_entry *entry; \
> - struct trace_seq *p; \
> + struct trace_seq *p = &iter->tmp_seq; \
> int ret; \
> \
> entry = iter->ent; \
> @@ -267,12 +263,10 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags, \
> \
> field = (typeof(field))entry; \
> \
> - p = &get_cpu_var(ftrace_event_seq); \
> trace_seq_init(p); \
> ret = trace_seq_printf(s, "%s: ", #call); \
> if (ret) \
> ret = trace_seq_printf(s, print); \
> - put_cpu(); \
> if (!ret) \
> return TRACE_TYPE_PARTIAL_LINE; \
> \
> diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
> index 57c1b45..1ba64d3 100644
> --- a/kernel/trace/trace_output.c
> +++ b/kernel/trace/trace_output.c
> @@ -16,9 +16,6 @@
>
> DECLARE_RWSEM(trace_event_mutex);
>
> -DEFINE_PER_CPU(struct trace_seq, ftrace_event_seq);
> -EXPORT_PER_CPU_SYMBOL(ftrace_event_seq);
> -
> static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
>
> static int next_event_type = __TRACE_LAST_TYPE + 1;
>


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/