[RFC PATCH v2 2/4] tracing/kprobe: Separate inc recursion count out of perf_trace_buf_prepare

From: He Kuang
Date: Thu Jul 02 2015 - 09:51:55 EST


Inside perf_trace_buf_prepare(), a recursion count is increased, the
count was first introduced by commit 444a2a3bcd6d ("tracing,
perf_events: Protect the buffer from recursion in perf") to protect
the percpu data buffer from being overwritten.

For future patch to enable eBPF saving data into perf trace buffer and
prevent data buffer being filled recursively, the recursion count is
increased outside before entering trace_call_bpf() and decreased in
case of error. In this condition, we use the new function
perf_trace_buf_prepare_rctx() for not increasing the recursion count a
second time.

Signed-off-by: He Kuang <hekuang@xxxxxxxxxx>
---
include/linux/ftrace_event.h | 2 ++
kernel/trace/trace_event_perf.c | 27 ++++++++++++++++++++++-----
kernel/trace/trace_kprobe.c | 28 ++++++++++++++++++++++------
3 files changed, 46 insertions(+), 11 deletions(-)

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index f9ecf63..d54f11d 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -614,6 +614,8 @@ extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
extern void ftrace_profile_free_filter(struct perf_event *event);
extern void *perf_trace_buf_prepare(int size, unsigned short type,
struct pt_regs **regs, int *rctxp);
+extern void *perf_trace_buf_prepare_rctx(int size, unsigned short type,
+ struct pt_regs **regs, int rctx);

static inline void
perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 6fa484d..344b601 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -260,8 +260,9 @@ void perf_trace_del(struct perf_event *p_event, int flags)
tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event);
}

-void *perf_trace_buf_prepare(int size, unsigned short type,
- struct pt_regs **regs, int *rctxp)
+static void *__perf_trace_buf_prepare(int size, unsigned short type,
+ struct pt_regs **regs, int *rctxp,
+ bool update_rctx)
{
struct trace_entry *entry;
unsigned long flags;
@@ -276,9 +277,11 @@ void *perf_trace_buf_prepare(int size, unsigned short type,

pc = preempt_count();

- *rctxp = perf_swevent_get_recursion_context();
- if (*rctxp < 0)
- return NULL;
+ if (update_rctx) {
+ *rctxp = perf_swevent_get_recursion_context();
+ if (*rctxp < 0)
+ return NULL;
+ }

if (regs)
*regs = this_cpu_ptr(&__perf_regs[*rctxp]);
@@ -294,9 +297,23 @@ void *perf_trace_buf_prepare(int size, unsigned short type,

return raw_data;
}
+
+void *perf_trace_buf_prepare(int size, unsigned short type,
+ struct pt_regs **regs, int *rctxp)
+{
+ return __perf_trace_buf_prepare(size, type, regs, rctxp, true);
+}
EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
NOKPROBE_SYMBOL(perf_trace_buf_prepare);

+void *perf_trace_buf_prepare_rctx(int size, unsigned short type,
+ struct pt_regs **regs, int rctx)
+{
+ return __perf_trace_buf_prepare(size, type, regs, &rctx, false);
+}
+EXPORT_SYMBOL_GPL(perf_trace_buf_prepare_rctx);
+NOKPROBE_SYMBOL(perf_trace_buf_prepare_rctx);
+
#ifdef CONFIG_FUNCTION_TRACER
static void
perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 5600df8..16ad88e 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1145,22 +1145,30 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
if (hlist_empty(head))
return;

- if (prog && !trace_call_bpf(prog, regs))
+ rctx = perf_swevent_get_recursion_context();
+ if (rctx < 0)
return;

+ if (prog && !trace_call_bpf(prog, regs))
+ goto out;
+
dsize = __get_data_size(&tk->tp, regs);
__size = sizeof(*entry) + tk->tp.size + dsize;
size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);

- entry = perf_trace_buf_prepare(size, call->event.type, NULL, &rctx);
+ entry = perf_trace_buf_prepare_rctx(size, call->event.type, NULL, rctx);
if (!entry)
- return;
+ goto out;

entry->ip = (unsigned long)tk->rp.kp.addr;
memset(&entry[1], 0, dsize);
store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
+
+ return;
+out:
+ perf_swevent_put_recursion_context(rctx);
}
NOKPROBE_SYMBOL(kprobe_perf_func);

@@ -1180,22 +1188,30 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
if (hlist_empty(head))
return;

- if (prog && !trace_call_bpf(prog, regs))
+ rctx = perf_swevent_get_recursion_context();
+ if (rctx < 0)
return;

+ if (prog && !trace_call_bpf(prog, regs))
+ goto out;
+
dsize = __get_data_size(&tk->tp, regs);
__size = sizeof(*entry) + tk->tp.size + dsize;
size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);

- entry = perf_trace_buf_prepare(size, call->event.type, NULL, &rctx);
+ entry = perf_trace_buf_prepare_rctx(size, call->event.type, NULL, rctx);
if (!entry)
- return;
+ goto out;

entry->func = (unsigned long)tk->rp.kp.addr;
entry->ret_ip = (unsigned long)ri->ret_addr;
store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
+
+ return;
+out:
+ perf_swevent_put_recursion_context(rctx);
}
NOKPROBE_SYMBOL(kretprobe_perf_func);
#endif /* CONFIG_PERF_EVENTS */
--
1.8.5.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/