Re: [PATCH] new irq tracer

From: Jason Baron
Date: Fri Feb 20 2009 - 14:53:55 EST


On Wed, Feb 18, 2009 at 11:10:35PM +0100, Peter Zijlstra wrote:
> On Wed, 2009-02-18 at 17:02 -0500, Frank Ch. Eigler wrote:
>
> > > I really am having a difficult time seeing the use in such narrow
> > > tracers.
> >
> > Part of the problem may come from defining "tracers" as something
> > limited to ftrace engines. Once such tracepoints are in the kernel,
> > more powerful analytical tools may be attached to them.
>
> ftrace graph tracer is by far the most powerful thing I've seen, there's
> nothing limiting about ftrace.
>
> What is limiting are these puny little tracers that have no real value.
>
> A much better purpose for these tracepoints would be augmenting data in
> existing tracers like the graph/function/sched tracer.
>

ok...using these two tracepoints i've augmented the graph tracer to add
a new column that prints the irq # and handler we're in, or -1
otherwise. its a new 'funcgraph-irq' in 'trace_options', sample output:


1) #-1:none 2.323 us | native_apic_mem_write();
1) #-1:none + 21.891 us | }
1) #-1:none | handle_IRQ_event() {
1) #29:ahci 1.938 us | irq_to_desc();
1) #29:ahci 2.626 us | runqueue_is_locked();
1) #29:ahci + 21.173 us | }
.
.
.
1) #29:ahci + 32.047 us | }
1) #29:ahci + 43.746 us | }
1) #29:ahci + 51.634 us | }
1) #29:ahci + 61.954 us | }
1) #-1:none 1.014 us | runqueue_is_locked();
1) #-1:none | __wake_up() {
1) #-1:none 0.920 us | _spin_lock_irqsave();
1) #-1:none 1.028 us | __wake_up_common();


This seems to work pretty well - although it does get confused by
certain preemption events which it can't handle...I think this can make
the logs more readable. I can probably get it working if ppl thinks its
valuable...or have better ways of implementing it. Code is below.

thanks,

-Jason


diff --git a/include/trace/irq.h b/include/trace/irq.h
new file mode 100644
index 0000000..ecec94a
--- /dev/null
+++ b/include/trace/irq.h
@@ -0,0 +1,25 @@
+#ifndef _TRACE_IRQ_H
+#define _TRACE_IRQ_H
+
+#include <linux/interrupt.h>
+#include <linux/tracepoint.h>
+
+enum {
+ IRQ_ENTRY = 0,
+ IRQ_EXIT = 1,
+};
+
+struct irq_trace {
+ int type;
+ int irq;
+ char irq_name[10];
+};
+
+DECLARE_TRACE(irq_entry,
+ TPPROTO(unsigned int id),
+ TPARGS(id));
+DECLARE_TRACE(irq_exit,
+ TPPROTO(unsigned int id, irqreturn_t retval),
+ TPARGS(id, retval));
+
+#endif /* _TRACE_IRQ_H */
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index fdff380..34053b5 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -18,6 +18,7 @@
#include <linux/rculist.h>
#include <linux/hash.h>
#include <linux/bootmem.h>
+#include <trace/irq.h>

#include "internals.h"

@@ -338,6 +339,9 @@ irqreturn_t no_action(int cpl, void *dev_id)
return IRQ_NONE;
}

+DEFINE_TRACE(irq_entry);
+DEFINE_TRACE(irq_exit);
+
/**
* handle_IRQ_event - irq action chain handler
* @irq: the interrupt number
@@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
local_irq_enable_in_hardirq();

do {
+ trace_irq_entry(irq);
ret = action->handler(irq, action->dev_id);
+ trace_irq_exit(irq, ret);
if (ret == IRQ_HANDLED)
status |= action->flags;
retval |= ret;
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 627090b..58b9f7e 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -38,5 +38,6 @@ obj-$(CONFIG_POWER_TRACER) += trace_power.o
obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
+obj-$(CONFIG_IRQ_TRACER) += trace_irq.o

libftrace-y := ftrace.o
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index dbff020..6a34aaa 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -11,6 +11,7 @@
#include <trace/boot.h>
#include <trace/kmemtrace.h>
#include <trace/power.h>
+#include <trace/irq.h>

enum trace_type {
__TRACE_FIRST_TYPE = 0,
@@ -33,6 +34,7 @@ enum trace_type {
TRACE_KMEM_ALLOC,
TRACE_KMEM_FREE,
TRACE_POWER,
+ TRACE_IRQ,
TRACE_BLK,

__TRACE_LAST_TYPE,
@@ -173,6 +175,11 @@ struct trace_power {
struct power_trace state_data;
};

+struct trace_irq {
+ struct trace_entry ent;
+ struct irq_trace irq_data;
+};
+
struct kmemtrace_alloc_entry {
struct trace_entry ent;
enum kmemtrace_type_id type_id;
@@ -298,6 +305,7 @@ extern void __ftrace_bad_type(void);
TRACE_GRAPH_RET); \
IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
+ IF_ASSIGN(var, ent, struct trace_irq, TRACE_IRQ); \
IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \
TRACE_KMEM_ALLOC); \
IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 519a0ca..4c4c777 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -10,10 +10,19 @@
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <linux/fs.h>
+#include <trace/irq.h>

#include "trace.h"
#include "trace_output.h"

+
+struct per_cpu_irq_data {
+ int irq_num;
+ char action_str[10];
+};
+static struct per_cpu_irq_data cpu_irq_data[NR_CPUS];
+static struct trace_array *graph_trace_tr;
+
#define TRACE_GRAPH_INDENT 2

/* Flag options */
@@ -23,6 +32,7 @@
#define TRACE_GRAPH_PRINT_PROC 0x8
#define TRACE_GRAPH_PRINT_DURATION 0x10
#define TRACE_GRAPH_PRINT_ABS_TIME 0X20
+#define TRACE_GRAPH_PRINT_IRQ 0X40

static struct tracer_opt trace_opts[] = {
/* Display overruns? (for self-debug purpose) */
@@ -37,6 +47,8 @@ static struct tracer_opt trace_opts[] = {
{ TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) },
/* Display absolute time of an entry */
{ TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
+ /* Display irq number, -1 otherwise */
+ { TRACER_OPT(funcgraph-irq, TRACE_GRAPH_PRINT_IRQ) },
{ } /* Empty entry */
};

@@ -52,10 +64,17 @@ static struct tracer_flags tracer_flags = {

static int graph_trace_init(struct trace_array *tr)
{
+ int i;
int ret = register_ftrace_graph(&trace_graph_return,
&trace_graph_entry);
if (ret)
return ret;
+ graph_trace_tr = tr;
+ for (i=0;i<NR_CPUS;i++) {
+ cpu_irq_data[i].irq_num = -1;
+ sprintf(cpu_irq_data[i].action_str, "none");
+ cpu_irq_data[i].action_str[4] = '\0';
+ }
tracing_start_cmdline_record();

return 0;
@@ -63,6 +82,7 @@ static int graph_trace_init(struct trace_array *tr)

static void graph_trace_reset(struct trace_array *tr)
{
+ graph_trace_tr = tr;
tracing_stop_cmdline_record();
unregister_ftrace_graph();
}
@@ -110,6 +130,27 @@ print_graph_cpu(struct trace_seq *s, int cpu)
return TRACE_TYPE_HANDLED;
}

+#define TRACE_GRAPH_IRQ_LENGTH 14
+
+static enum print_line_t
+print_graph_irqnum(struct trace_seq *s, int cpu)
+{
+ char irq_str[TRACE_GRAPH_IRQ_LENGTH];
+ int len, ret;
+
+ len = snprintf(irq_str, TRACE_GRAPH_IRQ_LENGTH, "#%d:%s", cpu_irq_data[cpu].irq_num, cpu_irq_data[cpu].action_str);
+ ret = trace_seq_printf(s, "%s", irq_str);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ while (len < TRACE_GRAPH_IRQ_LENGTH) {
+ ret = trace_seq_printf(s, " ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ len++;
+ }
+ return TRACE_TYPE_HANDLED;
+}
+
#define TRACE_GRAPH_PROCINFO_LENGTH 14

static enum print_line_t
@@ -504,6 +545,13 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
return TRACE_TYPE_PARTIAL_LINE;
}

+ /* irq */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_IRQ) {
+ ret = print_graph_irqnum(s, cpu);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
/* Proc */
if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
ret = print_graph_proc(s, ent->pid);
@@ -551,6 +599,13 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
return TRACE_TYPE_PARTIAL_LINE;
}

+ /* irq */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_IRQ) {
+ ret = print_graph_irqnum(s, cpu);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
/* Proc */
if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
ret = print_graph_proc(s, ent->pid);
@@ -627,6 +682,13 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
return TRACE_TYPE_PARTIAL_LINE;
}

+ /* irq */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_IRQ) {
+ ret = print_graph_irqnum(s, cpu);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
/* Proc */
if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
ret = print_graph_proc(s, ent->pid);
@@ -699,6 +761,22 @@ print_graph_function(struct trace_iterator *iter)
trace_assign_type(field, entry);
return print_graph_comment(field, s, entry, iter);
}
+ case TRACE_IRQ: {
+ struct trace_irq *field;
+ int cpu = iter->cpu;
+
+ trace_assign_type(field, entry);
+ if (field->irq_data.type == IRQ_ENTRY) {
+ cpu_irq_data[cpu].irq_num = field->irq_data.irq;
+ strncpy(cpu_irq_data[cpu].action_str,field->irq_data.irq_name, 9);
+ cpu_irq_data[cpu].action_str[9] = '\0';
+ } else {
+ cpu_irq_data[cpu].irq_num = -1;
+ sprintf(cpu_irq_data[cpu].action_str, "none");
+ cpu_irq_data[cpu].action_str[4] = '\0';
+ }
+ return TRACE_TYPE_HANDLED;
+ }
default:
return TRACE_TYPE_UNHANDLED;
}
@@ -753,6 +831,67 @@ static void graph_trace_close(struct trace_iterator *iter)
percpu_free(iter->private);
}

+static void probe_irq_exit(unsigned int irq, irqreturn_t retval)
+{
+ struct ring_buffer_event *event;
+ struct trace_irq *entry;
+
+ event = trace_buffer_lock_reserve(graph_trace_tr, TRACE_IRQ,
+ sizeof(*entry), 0, 0);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ entry->irq_data.type = IRQ_EXIT;
+ trace_buffer_unlock_commit(graph_trace_tr, event, 0, 0);
+}
+
+static void probe_irq_entry(unsigned int id)
+{
+ struct ring_buffer_event *event;
+ struct trace_irq *entry;
+ struct irq_desc *desc;
+
+ event = trace_buffer_lock_reserve(graph_trace_tr, TRACE_IRQ,
+ sizeof(*entry), 0, 0);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ entry->irq_data.irq = id;
+ entry->irq_data.type = IRQ_ENTRY;
+ desc = irq_to_desc(id);
+ strncpy(entry->irq_data.irq_name, desc->action->name, 9);
+ entry->irq_data.irq_name[9] = '\0';
+ trace_buffer_unlock_commit(graph_trace_tr, event, 0, 0);
+}
+
+static int graph_trace_set_flag(u32 old_flags, u32 bit, int set)
+{
+ int ret = 0;
+
+ if (bit == TRACE_GRAPH_PRINT_IRQ) {
+ /* do nothing if already set */
+ //if (!!set == !!(func_flags.val & TRACE_GRAPH_PRINT_IRQ))
+ // return 0;
+ if (set) {
+ ret = register_trace_irq_entry(probe_irq_entry);
+ if (ret) {
+ pr_info("irq trace: irq entry tracepoint failed to register\n");
+ return ret;
+ }
+ ret = register_trace_irq_exit(probe_irq_exit);
+ if (ret) {
+ pr_info("irq trace: irq exit tracepoint failed to register\n");
+ unregister_trace_irq_entry(probe_irq_entry);
+ return ret;
+ }
+ } else {
+ unregister_trace_irq_entry(probe_irq_entry);
+ unregister_trace_irq_exit(probe_irq_exit);
+ }
+ }
+ return ret;
+}
+
static struct tracer graph_trace __read_mostly = {
.name = "function_graph",
.open = graph_trace_open,
@@ -762,6 +901,7 @@ static struct tracer graph_trace __read_mostly = {
.print_line = print_graph_function,
.print_header = print_graph_headers,
.flags = &tracer_flags,
+ .set_flag = graph_trace_set_flag,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_function_graph,
#endif

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/