[PATCH 11/16] perf tools: Enable passing event to BPF object

From: Wang Nan
Date: Tue Nov 24 2015 - 08:37:26 EST


A new syntax is appended into parser so user can pass predefined perf
events into BPF objects.

After this patch, BPF programs for perf are finally able to utilize
bpf_perf_event_read() introduced in commit 35578d7984003097af2b1e3
(bpf: Implement function bpf_perf_event_read() that get the selected
hardware PMU conuter).

Test result:

# cat ./test_bpf_map_2.c
/************************ BEGIN **************************/
#define SEC(NAME) __attribute__((section(NAME), used))
enum bpf_map_type {
BPF_MAP_TYPE_PERF_EVENT_ARRAY = 4,
};
struct bpf_map_def {
unsigned int type;
unsigned int key_size;
unsigned int value_size;
unsigned int max_entries;
};
static void *(*map_lookup_elem)(struct bpf_map_def *, void *) =
(void *)1;
static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
(void *)6;
static int (*bpf_get_smp_processor_id)(void) =
(void *)8;
static int (*bpf_perf_event_read)(struct bpf_map_def *, int) =
(void *)22;

struct bpf_map_def SEC("maps") pmu_map = {
.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(int),
.max_entries = __NR_CPUS__,
};
SEC("func_write=sys_write")
int func_write(void *ctx)
{
unsigned long long val;
char fmt[] = "sys_write: pmu=%llu\n";
val = bpf_perf_event_read(&pmu_map, bpf_get_smp_processor_id());
bpf_trace_printk(fmt, sizeof(fmt), val);
return 0;
}

SEC("func_write_return=sys_write%return")
int func_write_return(void *ctx)
{
unsigned long long val = 0;
char fmt[] = "sys_write_return: pmu=%llu\n";
val = bpf_perf_event_read(&pmu_map, bpf_get_smp_processor_id());
bpf_trace_printk(fmt, sizeof(fmt), val);
return 0;
}
char _license[] SEC("license") = "GPL";
int _version SEC("version") = LINUX_VERSION_CODE;
/************************* END ***************************/

Normal case 1:
# echo "" > /sys/kernel/debug/tracing/trace
# ./perf record -e evt=cycles/no-inherit/ -e './test_bpf_map_2.c/maps:pmu_map:event=evt/' ls /
[SNIP]
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.013 MB perf.data (7 samples) ]
# cat /sys/kernel/debug/tracing/trace | grep ls
ls-13865 [006] d... 2722740.933204: : sys_write: pmu=1121685
ls-13865 [006] dN.. 2722740.933242: : sys_write_return: pmu=1178149
ls-13865 [006] d... 2722740.933248: : sys_write: pmu=1194986
ls-13865 [006] dN.. 2722740.933270: : sys_write_return: pmu=1220862

Normal case 2:
# echo "" > /sys/kernel/debug/tracing/trace
# ./perf record -e evt=cycles/period=0x7fffffffffffffff,no-inherit/ \
-e './test_bpf_map_2.c/maps:pmu_map:event=evt/' ls /
[SNIP]
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.013 MB perf.data ]
# ./perf report --stdio
Error:
The perf.data file has no samples!

(This is expected because we set period of cycles to a very large
value to period of cycles event because we want to use this event
as a counter only, don't need sampling)

# cat /sys/kernel/debug/tracing/trace | grep ls
ls-14446 [006] d... 2722976.486458: : sys_write: pmu=1116233
ls-14446 [006] dN.. 2722976.486486: : sys_write_return: pmu=1162108
ls-14446 [006] d... 2722976.486491: : sys_write: pmu=1177122
ls-14446 [006] dN.. 2722976.486511: : sys_write_return: pmu=1202417

Normal case 3:
# echo "" > /sys/kernel/debug/tracing/trace
# ./perf record -i -e cycles -e './test_bpf_map_2.c/maps:pmu_map:event=cycles/' ls /

(When doesn't explicitly set alias, event name can be used to search events)

[SNIP]
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.013 MB perf.data (7 samples) ]
# cat /sys/kernel/debug/tracing/trace | grep ls
ls-16480 [005] d... 2724143.955040: : sys_write: pmu=1150794
ls-16480 [005] dN.. 2724143.955077: : sys_write_return: pmu=1207161
ls-16480 [005] d... 2724143.955083: : sys_write: pmu=1219145
ls-16480 [005] dN.. 2724143.955104: : sys_write_return: pmu=1245433

Normal case 4 (one thread case):
# ls /proc/11808/task/
11808
# echo "" > /sys/kernel/debug/tracing/trace
# ./perf record -e evt=cycles/no-inherit/ -e './test_bpf_map_2.c/maps:pmu_map:event=evt/' -p 11808
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.019 MB perf.data (2 samples) ]

# cat /sys/kernel/debug/tracing/trace | grep 11808
sshd-11808 [000] d... 2740454.781150: : sys_write: pmu=18446744073709551594
sshd-11808 [000] d... 2740454.781168: : sys_write_return: pmu=18446744073709551594
sshd-11808 [003] d... 2740467.411799: : sys_write: pmu=131031
sshd-11808 [003] dN.. 2740467.411806: : sys_write_return: pmu=161549
sshd-11808 [003] d... 2740467.411834: : sys_write: pmu=210269

Normal case 5 (system wide):
# echo "" > /sys/kernel/debug/tracing/trace
# ./perf record -e evt=cycles/no-inherit/ -e './test_bpf_map_2.c/maps:pmu_map:event=evt/' -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.811 MB perf.data (120 samples) ]

# cat /sys/kernel/debug/tracing/trace | grep -v '18446744073709551594' | grep -v perf | head -n 20
[SNIP]
# TASK-PID CPU# |||| TIMESTAMP FUNCTION
# | | | |||| | |
gmain-30828 [002] d... 2740551.068992: : sys_write: pmu=84373
gmain-30828 [002] d... 2740551.068992: : sys_write_return: pmu=87696
gmain-30828 [002] d... 2740551.068996: : sys_write: pmu=100658
gmain-30828 [002] d... 2740551.068997: : sys_write_return: pmu=102572

Error case 1:

# ./perf record -e './test_bpf_map_2.c' ls /
[SNIP]
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.014 MB perf.data ]
# cat /sys/kernel/debug/tracing/trace | grep ls
ls-17115 [007] d... 2724279.665625: : sys_write: pmu=18446744073709551614
ls-17115 [007] dN.. 2724279.665651: : sys_write_return: pmu=18446744073709551614
ls-17115 [007] d... 2724279.665658: : sys_write: pmu=18446744073709551614
ls-17115 [007] dN.. 2724279.665677: : sys_write_return: pmu=18446744073709551614

(18446744073709551614 is 0xfffffffffffffffe (-2))

Error case 2:
# ./perf record -e cycles -e './test_bpf_map_2.c/maps:pmu_map:event=evt/' -a
event syntax error: '..ps:pmu_map:event=evt/'
\___ Event not found for map setting

Hint: Valid config terms:
maps:[<arraymap>]:value=[value]
maps:[<eventmap>]:event=[event]
[SNIP]

Error case 3:
# ls /proc/2342/task/
2342 2373 2374 2375 2376
# ./perf record -e evt=cycles/no-inherit/ -e './test_bpf_map_2.c/maps:pmu_map:event=evt/' -p 2342
ERROR: Apply config to BPF failed: Cannot set event to BPF maps in multi-thread tracing

Error case 4:
# ./perf record -e cycles -e './test_bpf_map_2.c/maps:pmu_map:event=cycles/' ls /
ERROR: Apply config to BPF failed: Doesn't support inherit event (Hint: use /no-inherit/ config term or use -i)

Error case 5:
# ./perf record -e evt=raw_syscalls:sys_enter/no-inherit/ -e './test_bpf_map_2.c/maps:pmu_map:event=evt/' ls
ERROR: Apply config to BPF failed: Can only put raw, hardware and BPF output event into a BPF map

Signed-off-by: Wang Nan <wangnan0@xxxxxxxxxx>
Signed-off-by: He Kuang <hekuang@xxxxxxxxxx>
Cc: Alexei Starovoitov <ast@xxxxxxxxxx>
Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@xxxxxxxxxxx>
Cc: Namhyung Kim <namhyung@xxxxxxxxxx>
Cc: Zefan Li <lizefan@xxxxxxxxxx>
Cc: pi3orama@xxxxxxx
---
tools/perf/util/bpf-loader.c | 138 ++++++++++++++++++++++++++++++++++++++++-
tools/perf/util/bpf-loader.h | 5 ++
tools/perf/util/parse-events.c | 4 +-
3 files changed, 145 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index ac0110c..a6e4bde 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -742,6 +742,7 @@ int bpf__foreach_tev(struct bpf_object *obj,

enum bpf_map_op_type {
BPF_MAP_OP_SET_VALUE,
+ BPF_MAP_OP_SET_EVSEL,
};

enum bpf_map_key_type {
@@ -754,6 +755,7 @@ struct bpf_map_op {
enum bpf_map_key_type key_type;
union {
u64 value;
+ struct perf_evsel *evsel;
} v;
};

@@ -880,10 +882,73 @@ bpf__obj_config_map_value(struct bpf_map *map,
if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM)
return bpf__obj_config_map_array_value(map, term);

- pr_debug("ERROR: wrong value type\n");
+ pr_debug("ERROR: wrong value type for 'value'\n");
return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE;
}

+static int
+bpf__obj_config_map_array_event(struct bpf_map *map,
+ struct parse_events_term *term,
+ struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+ struct bpf_map_def def;
+ struct bpf_map_op *op;
+ const char *map_name;
+ int err;
+
+ map_name = bpf_map__get_name(map);
+ evsel = perf_evlist__find_evsel_by_alias(evlist, term->val.str);
+ if (!evsel) {
+ pr_debug("Event (for '%s') '%s' doesn't exist\n",
+ map_name, term->val.str);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT;
+ }
+
+ err = bpf_map__get_def(map, &def);
+ if (err) {
+ pr_debug("Unable to get map definition from '%s'\n",
+ map_name);
+ return err;
+ }
+
+ /*
+ * No need to check key_size and value_size:
+ * kernel has already checked them.
+ */
+ if (def.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+ pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
+ map_name);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
+ }
+
+ op = bpf_map_op__alloc(map);
+ if (IS_ERR(op))
+ return PTR_ERR(op);
+
+ op->v.evsel = evsel;
+ op->op_type = BPF_MAP_OP_SET_EVSEL;
+ return 0;
+}
+
+static int
+bpf__obj_config_map_event(struct bpf_map *map,
+ struct parse_events_term *term,
+ struct perf_evlist *evlist)
+{
+ if (!term->err_val) {
+ pr_debug("Config value not set\n");
+ return -BPF_LOADER_ERRNO__OBJCONF_CONF;
+ }
+
+ if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR)
+ return bpf__obj_config_map_array_event(map, term, evlist);
+
+ pr_debug("ERROR: wrong value type for 'event'\n");
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE;
+}
+
+
struct bpf_obj_config_map_func {
const char *config_opt;
int (*config_func)(struct bpf_map *, struct parse_events_term *,
@@ -892,6 +957,7 @@ struct bpf_obj_config_map_func {

struct bpf_obj_config_map_func bpf_obj_config_map_funcs[] = {
{"value", bpf__obj_config_map_value},
+ {"event", bpf__obj_config_map_event},
};

static int
@@ -1036,6 +1102,7 @@ bpf_map_config_foreach_key(struct bpf_map *map,
list_for_each_entry(op, &priv->ops_list, list) {
switch (def.type) {
case BPF_MAP_TYPE_ARRAY:
+ case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
switch (op->key_type) {
case BPF_MAP_KEY_ALL:
return foreach_key_array_all(func, arg, name,
@@ -1090,6 +1157,60 @@ apply_config_value_for_key(int map_fd, void *pkey,
}

static int
+apply_config_evsel_for_key(const char *name, int map_fd, void *pkey,
+ struct perf_evsel *evsel)
+{
+ struct xyarray *xy = evsel->fd;
+ struct perf_event_attr *attr;
+ unsigned int key, events;
+ bool check_pass = false;
+ int *evt_fd;
+ int err;
+
+ if (!xy) {
+ pr_debug("ERROR: evsel not ready for map %s\n", name);
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ }
+
+ if (xy->row_size / xy->entry_size != 1) {
+ pr_debug("ERROR: Dimension of target event is incorrect for map %s\n",
+ name);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM;
+ }
+
+ attr = &evsel->attr;
+ if (attr->inherit) {
+ pr_debug("ERROR: Can't put inherit event into map %s\n", name);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH;
+ }
+
+ if (attr->type == PERF_TYPE_RAW)
+ check_pass = true;
+ if (attr->type == PERF_TYPE_HARDWARE)
+ check_pass = true;
+ if (attr->type == PERF_TYPE_SOFTWARE &&
+ attr->config == PERF_COUNT_SW_BPF_OUTPUT)
+ check_pass = true;
+ if (!check_pass) {
+ pr_debug("ERROR: Event type is wrong for map %s\n", name);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE;
+ }
+
+ events = xy->entries / (xy->row_size / xy->entry_size);
+ key = *((unsigned int *)pkey);
+ if (key >= events) {
+ pr_debug("ERROR: there is no event %d for map %s\n",
+ key, name);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE;
+ }
+ evt_fd = xyarray__entry(xy, key, 0);
+ err = bpf_map_update_elem(map_fd, pkey, evt_fd, BPF_ANY);
+ if (err && errno)
+ err = -errno;
+ return err;
+}
+
+static int
apply_obj_config_map_for_key(const char *name, int map_fd,
struct bpf_map_def *pdef __maybe_unused,
struct bpf_map_op *op,
@@ -1103,6 +1224,10 @@ apply_obj_config_map_for_key(const char *name, int map_fd,
pdef->value_size,
op->v.value);
break;
+ case BPF_MAP_OP_SET_EVSEL:
+ err = apply_config_evsel_for_key(name, map_fd, pkey,
+ op->v.evsel);
+ break;
default:
pr_debug("ERROR: unknown value type for '%s'\n", name);
err = -BPF_LOADER_ERRNO__INTERNAL;
@@ -1168,6 +1293,11 @@ static const char *bpf_loader_strerror_table[NR_ERRNO] = {
[ERRCODE_OFFSET(OBJCONF_MAP_TYPE)] = "Incorrect map type",
[ERRCODE_OFFSET(OBJCONF_MAP_KEYSIZE)] = "Incorrect map key size",
[ERRCODE_OFFSET(OBJCONF_MAP_VALUESIZE)] = "Incorrect map value size",
+ [ERRCODE_OFFSET(OBJCONF_MAP_NOEVT)] = "Event not found for map setting",
+ [ERRCODE_OFFSET(OBJCONF_MAP_MAPSIZE)] = "Invalid map size for event setting",
+ [ERRCODE_OFFSET(OBJCONF_MAP_EVTDIM)] = "Event dimension too large",
+ [ERRCODE_OFFSET(OBJCONF_MAP_EVTINH)] = "Doesn't support inherit event",
+ [ERRCODE_OFFSET(OBJCONF_MAP_EVTTYPE)] = "Wrong event type for map",
};

static int
@@ -1304,6 +1434,12 @@ int bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused,
int bpf__strerror_apply_obj_config(int err, char *buf, size_t size)
{
bpf__strerror_head(err, buf, size);
+ bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM,
+ "Cannot set event to BPF maps in multi-thread tracing");
+ bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH,
+ "%s (Hint: use /no-inherit/ config term or use -i)", emsg);
+ bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE,
+ "Can only put raw, hardware and BPF output event into a BPF map");
bpf__strerror_end(buf, size);
return 0;
}
diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
index db3c34c..c9ce792 100644
--- a/tools/perf/util/bpf-loader.h
+++ b/tools/perf/util/bpf-loader.h
@@ -33,6 +33,11 @@ enum bpf_loader_errno {
BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE, /* Incorrect map type */
BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE, /* Incorrect map key size */
BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE,/* Incorrect map value size */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT, /* Event not found for map setting */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE, /* Invalid map size for event setting */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM, /* Event dimension too large */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH, /* Doesn't support inherit event */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE, /* Wrong event type for map */
__BPF_LOADER_ERRNO__END,
};

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4e51ab3..799bfd2 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -655,8 +655,10 @@ parse_events_config_bpf(struct parse_events_evlist *data,
&error_pos, err, errbuf,
sizeof(errbuf));
data->error->help = strdup(
-"Hint:\tValid config term:\n"
+"Hint:\tValid config terms:\n"
" \tmaps:[<arraymap>]:value=[value]\n"
+" \tmaps:[<eventmap>]:event=[event]\n"
+"\n"
" \t(add -v to see detail)");
data->error->str = strdup(errbuf);
if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE)
--
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/