[PATCH 3/5] ftrace perf: Use ftrace_ops::private to store event pointer

From: Jiri Olsa
Date: Wed Mar 16 2016 - 10:34:59 EST


Having following commands running concurrently:

# perf record -e ftrace:function -a -o krava.data sleep 10
# perf record -e ftrace:function --filter 'ip == SyS_read' ls

will end up in the latter one to fail on the filter rules
and store all functions (in perf.data) as instructed by the
first perf record instead of just SyS_read records.

The reason for this is, that tracepoint code by default
triggers all events that registered for the tracepoint.

While ftrace:function is special because ftrace_ops
itself carries a filter and only the event that owns
ftrace_ops is eligible to be triggered.

Fixing this by using ftrace_ops::private value to keep
the perf_event pointer. This way we don't need to search
for triggered event (as tracepoint handler does) and
directly store sample.

Acked-by: Namhyung Kim <namhyung@xxxxxxxxxx>
Suggested-by: Steven Rostedt <rostedt@xxxxxxxxxxx>
Signed-off-by: Jiri Olsa <jolsa@xxxxxxxxxx>
---
include/linux/perf_event.h | 3 +++
kernel/events/core.c | 22 ++++++++++++++++++++++
kernel/trace/trace_event_perf.c | 10 +++-------
3 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a9d8cab18b00..a330dc06d90d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1008,6 +1008,9 @@ extern void perf_tp_event(u64 addr, u64 count, void *record,
int entry_size, struct pt_regs *regs,
struct hlist_head *head, int rctx,
struct task_struct *task);
+void perf_function_event(struct perf_event *event,
+ void *record, int entry_size,
+ struct pt_regs *regs);
extern void perf_bp_event(struct perf_event *event, void *data);

#ifndef perf_misc_flags
diff --git a/kernel/events/core.c b/kernel/events/core.c
index ca68fdcf47ce..18da90859c17 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7063,6 +7063,28 @@ static void perf_event_free_bpf_prog(struct perf_event *event)
}
}

+#ifdef CONFIG_FUNCTION_TRACER
+void perf_function_event(struct perf_event *event,
+ void *record, int entry_size,
+ struct pt_regs *regs)
+
+{
+ struct perf_sample_data data;
+ struct perf_raw_record raw = {
+ .size = entry_size,
+ .data = record,
+ };
+
+ if (event->hw.state & PERF_HES_STOPPED)
+ return;
+
+ perf_sample_data_init(&data, 0, 0);
+ data.raw = &raw;
+
+ perf_swevent_event(event, 1, &data, regs);
+}
+#endif /* CONFIG_FUNCTION_TRACER */
+
#else

static inline void perf_tp_register(void)
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 6e6d4052f398..37de72405a64 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -331,14 +331,9 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct pt_regs *pt_regs)
{
struct ftrace_entry *entry;
- struct hlist_head *head;
struct pt_regs regs;
int rctx;

- head = this_cpu_ptr(event_function.perf_events);
- if (hlist_empty(head))
- return;
-
#define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \
sizeof(u64)) - sizeof(u32))

@@ -352,8 +347,8 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,

entry->ip = ip;
entry->parent_ip = parent_ip;
- perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0,
- 1, &regs, head, NULL);
+ perf_function_event(ops->private, entry, ENTRY_SIZE, &regs);
+ perf_swevent_put_recursion_context(rctx);

#undef ENTRY_SIZE
}
@@ -362,6 +357,7 @@ static int perf_ftrace_function_register(struct perf_event *event)
{
struct ftrace_ops *ops = &event->ftrace_ops;

+ ops->private = event;
ops->flags |= FTRACE_OPS_FL_PER_CPU | FTRACE_OPS_FL_RCU;
ops->func = perf_ftrace_function_call;
return register_ftrace_function(ops);
--
2.4.3