[PATCH v7 perf, bpf-next 3/8] perf, bpf: introduce PERF_RECORD_BPF_EVENT

From: Song Liu
Date: Thu Jan 10 2019 - 19:19:49 EST


For better performance analysis of BPF programs, this patch introduces
PERF_RECORD_BPF_EVENT, a new perf_event_type that exposes BPF program
load/unload information to user space.

Each BPF program may contain up to BPF_MAX_SUBPROGS (256) sub programs.
The following example shows kernel symbols for a BPF program with 7
sub programs:

ffffffffa0257cf9 t bpf_prog_b07ccb89267cf242_F
ffffffffa02592e1 t bpf_prog_2dcecc18072623fc_F
ffffffffa025b0e9 t bpf_prog_bb7a405ebaec5d5c_F
ffffffffa025dd2c t bpf_prog_a7540d4a39ec1fc7_F
ffffffffa025fcca t bpf_prog_05762d4ade0e3737_F
ffffffffa026108f t bpf_prog_db4bd11e35df90d4_F
ffffffffa0263f00 t bpf_prog_89d64e4abf0f0126_F
ffffffffa0257cf9 t bpf_prog_ae31629322c4b018__dummy_tracepoi

When a bpf program is loaded, PERF_RECORD_KSYMBOL is generated for
each of these sub programs. Therefore, PERF_RECORD_BPF_EVENT is not
needed for simple profiling.

For annotation, user space need to listen to PERF_RECORD_BPF_EVENT
and gather more information about these (sub) programs via sys_bpf.

Signed-off-by: Song Liu <songliubraving@xxxxxx>
---
include/linux/filter.h | 7 +++
include/linux/perf_event.h | 6 +++
include/uapi/linux/perf_event.h | 29 ++++++++++-
kernel/bpf/core.c | 2 +-
kernel/bpf/syscall.c | 2 +
kernel/events/core.c | 86 +++++++++++++++++++++++++++++++++
6 files changed, 130 insertions(+), 2 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 8c8544b375eb..bcb086ed01d4 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -955,6 +955,7 @@ bpf_address_lookup(unsigned long addr, unsigned long *size,

void bpf_prog_kallsyms_add(struct bpf_prog *fp);
void bpf_prog_kallsyms_del(struct bpf_prog *fp);
+void bpf_get_prog_name(const struct bpf_prog *prog, char *sym);

#else /* CONFIG_BPF_JIT */

@@ -1010,6 +1011,12 @@ static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp)
static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp)
{
}
+
+static inline void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
+{
+ sym[0] = '\0';
+}
+
#endif /* CONFIG_BPF_JIT */

void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 77b2560f2dc7..0b539a2e21af 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1119,6 +1119,9 @@ typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data);
extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
bool unregister,
perf_ksymbol_get_name_f get_name, void *data);
+extern void perf_event_bpf_event(struct bpf_prog *prog,
+ enum perf_bpf_event_type type,
+ u16 flags);

extern struct perf_guest_info_callbacks *perf_guest_cbs;
extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
@@ -1346,6 +1349,9 @@ static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
bool unregister,
perf_ksymbol_get_name_f get_name,
void *data) { }
+static inline void perf_event_bpf_event(struct bpf_prog *prog,
+ enum perf_bpf_event_type type,
+ u16 flags) { }
static inline void perf_event_exec(void) { }
static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
static inline void perf_event_namespaces(struct task_struct *tsk) { }
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 68c4da0227c5..8bd78a34e396 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -373,7 +373,8 @@ struct perf_event_attr {
write_backward : 1, /* Write ring buffer from end to beginning */
namespaces : 1, /* include namespaces data */
ksymbol : 1, /* include ksymbol events */
- __reserved_1 : 34;
+ bpf_event : 1, /* include bpf events */
+ __reserved_1 : 33;

union {
__u32 wakeup_events; /* wakeup every n events */
@@ -981,6 +982,25 @@ enum perf_event_type {
*/
PERF_RECORD_KSYMBOL = 17,

+ /*
+ * Record bpf events:
+ * enum perf_bpf_event_type {
+ * PERF_BPF_EVENT_UNKNOWN = 0,
+ * PERF_BPF_EVENT_PROG_LOAD = 1,
+ * PERF_BPF_EVENT_PROG_UNLOAD = 2,
+ * };
+ *
+ * struct {
+ * struct perf_event_header header;
+ * u16 type;
+ * u16 flags;
+ * u32 id;
+ * u8 tag[BPF_TAG_SIZE];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_BPF_EVENT = 18,
+
PERF_RECORD_MAX, /* non-ABI */
};

@@ -992,6 +1012,13 @@ enum perf_record_ksymbol_type {

#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0)

+enum perf_bpf_event_type {
+ PERF_BPF_EVENT_UNKNOWN = 0,
+ PERF_BPF_EVENT_PROG_LOAD = 1,
+ PERF_BPF_EVENT_PROG_UNLOAD = 2,
+ PERF_BPF_EVENT_MAX, /* non-ABI */
+};
+
#define PERF_MAX_STACK_DEPTH 127
#define PERF_MAX_CONTEXTS_PER_STACK 8

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 38de580abcc2..5c6b0fe6fbf7 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -494,7 +494,7 @@ bpf_get_prog_addr_region(const struct bpf_prog *prog,
*symbol_end = addr + hdr->pages * PAGE_SIZE;
}

-static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
+void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
{
const char *end = sym + KSYM_NAME_LEN;
const struct btf_type *type;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0607db304def..4af63c8c95eb 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1211,6 +1211,7 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
{
if (atomic_dec_and_test(&prog->aux->refcnt)) {
+ perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
/* bpf_prog_free_id() must be called first */
bpf_prog_free_id(prog, do_idr_lock);
bpf_prog_kallsyms_del_all(prog);
@@ -1554,6 +1555,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
}

bpf_prog_kallsyms_add(prog);
+ perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0);
return err;

free_used_maps:
diff --git a/kernel/events/core.c b/kernel/events/core.c
index dbcc90c79893..8a8fa1b066ff 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -386,6 +386,7 @@ static atomic_t nr_task_events __read_mostly;
static atomic_t nr_freq_events __read_mostly;
static atomic_t nr_switch_events __read_mostly;
static atomic_t nr_ksymbol_events __read_mostly;
+static atomic_t nr_bpf_events __read_mostly;

static LIST_HEAD(pmus);
static DEFINE_MUTEX(pmus_lock);
@@ -4308,6 +4309,8 @@ static void unaccount_event(struct perf_event *event)
dec = true;
if (event->attr.ksymbol)
atomic_dec(&nr_ksymbol_events);
+ if (event->attr.bpf_event)
+ atomic_dec(&nr_bpf_events);

if (dec) {
if (!atomic_add_unless(&perf_sched_count, -1, 1))
@@ -7744,6 +7747,87 @@ void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister,
WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type);
}

+/*
+ * bpf program load/unload tracking
+ */
+
+struct perf_bpf_event {
+ struct bpf_prog *prog;
+ struct {
+ struct perf_event_header header;
+ u16 type;
+ u16 flags;
+ u32 id;
+ u8 tag[BPF_TAG_SIZE];
+ } event_id;
+};
+
+static int perf_event_bpf_match(struct perf_event *event)
+{
+ return event->attr.bpf_event;
+}
+
+static void perf_event_bpf_output(struct perf_event *event, void *data)
+{
+ struct perf_bpf_event *bpf_event = data;
+ struct perf_output_handle handle;
+ struct perf_sample_data sample;
+ int ret;
+
+ if (!perf_event_bpf_match(event))
+ return;
+
+ perf_event_header__init_id(&bpf_event->event_id.header,
+ &sample, event);
+ ret = perf_output_begin(&handle, event,
+ bpf_event->event_id.header.size);
+ if (ret)
+ return;
+
+ perf_output_put(&handle, bpf_event->event_id);
+ perf_event__output_id_sample(event, &handle, &sample);
+
+ perf_output_end(&handle);
+}
+
+static int perf_event_bpf_get_name(char *name, int len, void *data)
+{
+ struct bpf_prog *prog = data;
+
+ bpf_get_prog_name(prog, name);
+ return 0;
+}
+
+void perf_event_bpf_event(struct bpf_prog *prog,
+ enum perf_bpf_event_type type,
+ u16 flags)
+{
+ struct perf_bpf_event bpf_event;
+
+ if (type <= PERF_BPF_EVENT_UNKNOWN ||
+ type >= PERF_BPF_EVENT_MAX)
+ return;
+
+ if (!atomic_read(&nr_bpf_events))
+ return;
+
+ bpf_event = (struct perf_bpf_event){
+ .prog = prog,
+ .event_id = {
+ .header = {
+ .type = PERF_RECORD_BPF_EVENT,
+ .size = sizeof(bpf_event.event_id),
+ },
+ .type = type,
+ .flags = flags,
+ .id = prog->aux->id,
+ },
+ };
+
+ memcpy(bpf_event.event_id.tag, prog->tag, BPF_TAG_SIZE);
+ perf_iterate_sb(perf_event_bpf_output, &bpf_event, NULL);
+}
+
void perf_event_itrace_started(struct perf_event *event)
{
event->attach_state |= PERF_ATTACH_ITRACE;
@@ -9996,6 +10080,8 @@ static void account_event(struct perf_event *event)
inc = true;
if (event->attr.ksymbol)
atomic_inc(&nr_ksymbol_events);
+ if (event->attr.bpf_event)
+ atomic_inc(&nr_bpf_events);

if (inc) {
/*
--
2.17.1