[RFC PATCH v2 3/4] bpf: Introduce function for outputing data to perf event
From: He Kuang
Date: Thu Jul 02 2015 - 09:52:08 EST
Add function to receive data from eBPF programs and fill that into
perf trace buffer of the current context. In previous patch we make
sure that the recursion counter protecting perf trace buffer is
checked when bpf_prog is executed, so here we can safely fill the
trace buffer. The data is temporarily stored at the end of
perf_trace_buf, the last 4 bytes of the buffer is used as a valid flag
and contains tempory buffer length.
In order to get the corresponding trace buffer of the context, new
function perf_swevent_current_context_type() is added, this function
only gets the current context type but does not increase the recursion
count.
Signed-off-by: He Kuang <hekuang@xxxxxxxxxx>
---
include/linux/ftrace_event.h | 2 ++
include/linux/perf_event.h | 2 ++
include/uapi/linux/bpf.h | 3 +++
kernel/events/core.c | 6 ++++++
kernel/events/internal.h | 17 ++++++++++-------
kernel/trace/bpf_trace.c | 29 +++++++++++++++++++++++++++++
kernel/trace/trace_event_perf.c | 29 +++++++++++++++++++++++++++++
samples/bpf/bpf_helpers.h | 2 ++
8 files changed, 83 insertions(+), 7 deletions(-)
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index d54f11d..1c1f3ad 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -616,6 +616,8 @@ extern void *perf_trace_buf_prepare(int size, unsigned short type,
struct pt_regs **regs, int *rctxp);
extern void *perf_trace_buf_prepare_rctx(int size, unsigned short type,
struct pt_regs **regs, int rctx);
+extern void *perf_trace_buf_prepare_rctx_tail(int size, int rctx);
+extern void *get_perf_trace_buf(int rctx);
static inline void
perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a204d52..984c89c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -939,6 +939,7 @@ extern unsigned int perf_output_copy(struct perf_output_handle *handle,
const void *buf, unsigned int len);
extern unsigned int perf_output_skip(struct perf_output_handle *handle,
unsigned int len);
+extern int perf_swevent_current_context_type(void);
extern int perf_swevent_get_recursion_context(void);
extern void perf_swevent_put_recursion_context(int rctx);
extern u64 perf_swevent_set_period(struct perf_event *event);
@@ -995,6 +996,7 @@ static inline void perf_event_exec(void) { }
static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
static inline void perf_event_fork(struct task_struct *tsk) { }
static inline void perf_event_init(void) { }
+static inline int perf_swevent_current_context_type(void); { return -1; }
static inline int perf_swevent_get_recursion_context(void) { return -1; }
static inline void perf_swevent_put_recursion_context(int rctx) { }
static inline u64 perf_swevent_set_period(struct perf_event *event) { return 0; }
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a9ebdf5..13d3e46 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -210,6 +210,9 @@ enum bpf_func_id {
* Return: 0 on success
*/
BPF_FUNC_l4_csum_replace,
+
+ /* int bpf_output_data(void *src, int size) */
+ BPF_FUNC_output_data,
__BPF_FUNC_MAX_ID,
};
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 9e0773d..0224d5b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6376,6 +6376,12 @@ end:
DEFINE_PER_CPU(struct pt_regs, __perf_regs[4]);
+int perf_swevent_current_context_type(void)
+{
+ return current_context_type();
+}
+EXPORT_SYMBOL_GPL(perf_swevent_current_context_type);
+
int perf_swevent_get_recursion_context(void)
{
struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 2deb24c..5cabce5 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -175,18 +175,21 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs);
extern int get_callchain_buffers(void);
extern void put_callchain_buffers(void);
-static inline int get_recursion_context(int *recursion)
+static inline int current_context_type(void)
{
- int rctx;
-
if (in_nmi())
- rctx = 3;
+ return 3;
else if (in_irq())
- rctx = 2;
+ return 2;
else if (in_softirq())
- rctx = 1;
+ return 1;
else
- rctx = 0;
+ return 0;
+}
+
+static inline int get_recursion_context(int *recursion)
+{
+ int rctx = current_context_type();
if (recursion[rctx])
return -1;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 2d56ce5..9159b5e 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -79,6 +79,33 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
.arg3_type = ARG_ANYTHING,
};
+static u64 bpf_output_data(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+ void *src = (void *) (long) r1;
+ int size = (int) r2;
+ void *buf;
+ int rctx = perf_swevent_current_context_type();
+
+ if (rctx < 0)
+ return -EINVAL;
+
+ buf = perf_trace_buf_prepare_rctx_tail(size, rctx);
+ if (!buf)
+ return -ENOMEM;
+
+ memcpy(buf, src, size);
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_output_data_proto = {
+ .func = bpf_output_data,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_STACK,
+ .arg2_type = ARG_CONST_STACK_SIZE,
+};
+
static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
{
/* NMI safe access to clock monotonic */
@@ -170,6 +197,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
return &bpf_map_delete_elem_proto;
case BPF_FUNC_probe_read:
return &bpf_probe_read_proto;
+ case BPF_FUNC_output_data:
+ return &bpf_output_data_proto;
case BPF_FUNC_ktime_get_ns:
return &bpf_ktime_get_ns_proto;
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 344b601..2eeb59b 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -260,6 +260,35 @@ void perf_trace_del(struct perf_event *p_event, int flags)
tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event);
}
+void *perf_trace_buf_prepare_rctx_tail(int size, int rctx)
+{
+ char *raw_data;
+
+ BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
+
+ if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
+ "perf buffer not large enough"))
+ return NULL;
+
+ raw_data = this_cpu_ptr(perf_trace_buf[rctx]);
+ raw_data += (PERF_MAX_TRACE_SIZE - sizeof(u32));
+
+ /* The lat 4 bytes is raw_data size and it is used as a valid flag */
+ *(u32 *)raw_data = size;
+ raw_data -= size;
+
+ return raw_data;
+}
+EXPORT_SYMBOL_GPL(perf_trace_buf_prepare_rctx_tail);
+NOKPROBE_SYMBOL(perf_trace_buf_prepare_rctx_tail);
+
+void *get_perf_trace_buf(int rctx)
+{
+ return this_cpu_ptr(perf_trace_buf[rctx]);
+}
+EXPORT_SYMBOL_GPL(get_perf_trace_buf);
+NOKPROBE_SYMBOL(get_perf_trace_buf);
+
static void *__perf_trace_buf_prepare(int size, unsigned short type,
struct pt_regs **regs, int *rctxp,
bool update_rctx)
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index f960b5f..44bfbeb 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -17,6 +17,8 @@ static int (*bpf_map_delete_elem)(void *map, void *key) =
(void *) BPF_FUNC_map_delete_elem;
static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) =
(void *) BPF_FUNC_probe_read;
+static int (*bpf_output_data)(void *src, int size) =
+ (void *) BPF_FUNC_output_data;
static unsigned long long (*bpf_ktime_get_ns)(void) =
(void *) BPF_FUNC_ktime_get_ns;
static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
--
1.8.5.2
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/