Re: [PATCH 1/1] perf trace: Support collecting 'union's with the BPF augmenter

From: Arnaldo Carvalho de Melo
Date: Tue Sep 10 2024 - 09:40:36 EST


On Tue, Sep 10, 2024 at 10:36:46AM -0300, Arnaldo Carvalho de Melo wrote:
> On Tue, Sep 10, 2024 at 10:27:34AM -0300, Arnaldo Carvalho de Melo wrote:
> > And reuse the BTF based struct pretty printer, with that we can offer
> > initial support for the 'bpf' syscall's second argument, a 'union
> > bpf_attr' pointer.
> >
> > But this is not that satisfactory as the libbpf btf dumper will pretty
> > print _all_ the union, we need to have a way to say that the first arg
> > selects the type for the union member to be pretty printed, something
> > like what pahole does translating the PERF_RECORD_ selector into a name,
> > and using that name to find a matching struct.

Description of the above technique in the pahole man page:

https://git.kernel.org/pub/scm/devel/pahole/pahole.git/tree/man-pages/pahole.1#n785

https://git.kernel.org/pub/scm/devel/pahole/pahole.git/tree/man-pages/pahole.1#n961

- Arnaldo

> Forgot to add Alan to the CC list, perhaps he has ideas about how to
> improve the union BTF dumper.
>
> How to somehow map the 'cmd' in the BPF syscall to the right union
> member.
>
> Maybe we should have a table with 'cmd' and name of union struct member,
> in the cases there is a name, and to the name of one of the members,
> when that is unambiguous. At start time, when the BPF syscall is in
> wanted, we may do all these lookups and build a table mapping things,
> then tell the libbpf BTF dumper which of tne entries in the struct,
> well, that member type id to use to pretty print, seems doable, wdyt?
>
> - Arnaldo
>
> > In the case of 'union bpf_attr' it would map PROG_LOAD to one of the
> > union members, but unfortunately there is no such mapping:
> >
> > root@number:~# pahole bpf_attr
> > union bpf_attr {
> > struct {
> > __u32 map_type; /* 0 4 */
> > __u32 key_size; /* 4 4 */
> > __u32 value_size; /* 8 4 */
> > __u32 max_entries; /* 12 4 */
> > __u32 map_flags; /* 16 4 */
> > __u32 inner_map_fd; /* 20 4 */
> > __u32 numa_node; /* 24 4 */
> > char map_name[16]; /* 28 16 */
> > __u32 map_ifindex; /* 44 4 */
> > __u32 btf_fd; /* 48 4 */
> > __u32 btf_key_type_id; /* 52 4 */
> > __u32 btf_value_type_id; /* 56 4 */
> > __u32 btf_vmlinux_value_type_id; /* 60 4 */
> > /* --- cacheline 1 boundary (64 bytes) --- */
> > __u64 map_extra; /* 64 8 */
> > __s32 value_type_btf_obj_fd; /* 72 4 */
> > __s32 map_token_fd; /* 76 4 */
> > }; /* 0 80 */
> > struct {
> > __u32 map_fd; /* 0 4 */
> >
> > /* XXX 4 bytes hole, try to pack */
> >
> > __u64 key; /* 8 8 */
> > union {
> > __u64 value; /* 16 8 */
> > __u64 next_key; /* 16 8 */
> > }; /* 16 8 */
> > __u64 flags; /* 24 8 */
> > }; /* 0 32 */
> > struct {
> > __u64 in_batch; /* 0 8 */
> > __u64 out_batch; /* 8 8 */
> > __u64 keys; /* 16 8 */
> > __u64 values; /* 24 8 */
> > __u32 count; /* 32 4 */
> > __u32 map_fd; /* 36 4 */
> > __u64 elem_flags; /* 40 8 */
> > __u64 flags; /* 48 8 */
> > } batch; /* 0 56 */
> > struct {
> > __u32 prog_type; /* 0 4 */
> > __u32 insn_cnt; /* 4 4 */
> > __u64 insns; /* 8 8 */
> > __u64 license; /* 16 8 */
> > __u32 log_level; /* 24 4 */
> > __u32 log_size; /* 28 4 */
> > __u64 log_buf; /* 32 8 */
> > __u32 kern_version; /* 40 4 */
> > __u32 prog_flags; /* 44 4 */
> > char prog_name[16]; /* 48 16 */
> > /* --- cacheline 1 boundary (64 bytes) --- */
> > __u32 prog_ifindex; /* 64 4 */
> > __u32 expected_attach_type; /* 68 4 */
> > __u32 prog_btf_fd; /* 72 4 */
> > __u32 func_info_rec_size; /* 76 4 */
> > __u64 func_info; /* 80 8 */
> > __u32 func_info_cnt; /* 88 4 */
> > __u32 line_info_rec_size; /* 92 4 */
> > __u64 line_info; /* 96 8 */
> > __u32 line_info_cnt; /* 104 4 */
> > __u32 attach_btf_id; /* 108 4 */
> > union {
> > __u32 attach_prog_fd; /* 112 4 */
> > __u32 attach_btf_obj_fd; /* 112 4 */
> > }; /* 112 4 */
> > __u32 core_relo_cnt; /* 116 4 */
> > __u64 fd_array; /* 120 8 */
> > /* --- cacheline 2 boundary (128 bytes) --- */
> > __u64 core_relos; /* 128 8 */
> > __u32 core_relo_rec_size; /* 136 4 */
> > __u32 log_true_size; /* 140 4 */
> > __s32 prog_token_fd; /* 144 4 */
> > }; /* 0 152 */
> > struct {
> > __u64 pathname; /* 0 8 */
> > __u32 bpf_fd; /* 8 4 */
> > __u32 file_flags; /* 12 4 */
> > __s32 path_fd; /* 16 4 */
> > }; /* 0 24 */
> > struct {
> > union {
> > __u32 target_fd; /* 0 4 */
> > __u32 target_ifindex; /* 0 4 */
> > }; /* 0 4 */
> > __u32 attach_bpf_fd; /* 4 4 */
> > __u32 attach_type; /* 8 4 */
> > __u32 attach_flags; /* 12 4 */
> > __u32 replace_bpf_fd; /* 16 4 */
> > union {
> > __u32 relative_fd; /* 20 4 */
> > __u32 relative_id; /* 20 4 */
> > }; /* 20 4 */
> > __u64 expected_revision; /* 24 8 */
> > }; /* 0 32 */
> > struct {
> > __u32 prog_fd; /* 0 4 */
> > __u32 retval; /* 4 4 */
> > __u32 data_size_in; /* 8 4 */
> > __u32 data_size_out; /* 12 4 */
> > __u64 data_in; /* 16 8 */
> > __u64 data_out; /* 24 8 */
> > __u32 repeat; /* 32 4 */
> > __u32 duration; /* 36 4 */
> > __u32 ctx_size_in; /* 40 4 */
> > __u32 ctx_size_out; /* 44 4 */
> > __u64 ctx_in; /* 48 8 */
> > __u64 ctx_out; /* 56 8 */
> > /* --- cacheline 1 boundary (64 bytes) --- */
> > __u32 flags; /* 64 4 */
> > __u32 cpu; /* 68 4 */
> > __u32 batch_size; /* 72 4 */
> > } test; /* 0 80 */
> > struct {
> > union {
> > __u32 start_id; /* 0 4 */
> > __u32 prog_id; /* 0 4 */
> > __u32 map_id; /* 0 4 */
> > __u32 btf_id; /* 0 4 */
> > __u32 link_id; /* 0 4 */
> > }; /* 0 4 */
> > __u32 next_id; /* 4 4 */
> > __u32 open_flags; /* 8 4 */
> > }; /* 0 12 */
> > struct {
> > __u32 bpf_fd; /* 0 4 */
> > __u32 info_len; /* 4 4 */
> > __u64 info; /* 8 8 */
> > } info; /* 0 16 */
> > struct {
> > union {
> > __u32 target_fd; /* 0 4 */
> > __u32 target_ifindex; /* 0 4 */
> > }; /* 0 4 */
> > __u32 attach_type; /* 4 4 */
> > __u32 query_flags; /* 8 4 */
> > __u32 attach_flags; /* 12 4 */
> > __u64 prog_ids; /* 16 8 */
> > union {
> > __u32 prog_cnt; /* 24 4 */
> > __u32 count; /* 24 4 */
> > }; /* 24 4 */
> >
> > /* XXX 4 bytes hole, try to pack */
> >
> > __u64 prog_attach_flags; /* 32 8 */
> > __u64 link_ids; /* 40 8 */
> > __u64 link_attach_flags; /* 48 8 */
> > __u64 revision; /* 56 8 */
> > } query; /* 0 64 */
> > struct {
> > __u64 name; /* 0 8 */
> > __u32 prog_fd; /* 8 4 */
> >
> > /* XXX 4 bytes hole, try to pack */
> >
> > __u64 cookie; /* 16 8 */
> > } raw_tracepoint; /* 0 24 */
> > struct {
> > __u64 btf; /* 0 8 */
> > __u64 btf_log_buf; /* 8 8 */
> > __u32 btf_size; /* 16 4 */
> > __u32 btf_log_size; /* 20 4 */
> > __u32 btf_log_level; /* 24 4 */
> > __u32 btf_log_true_size; /* 28 4 */
> > __u32 btf_flags; /* 32 4 */
> > __s32 btf_token_fd; /* 36 4 */
> > }; /* 0 40 */
> > struct {
> > __u32 pid; /* 0 4 */
> > __u32 fd; /* 4 4 */
> > __u32 flags; /* 8 4 */
> > __u32 buf_len; /* 12 4 */
> > __u64 buf; /* 16 8 */
> > __u32 prog_id; /* 24 4 */
> > __u32 fd_type; /* 28 4 */
> > __u64 probe_offset; /* 32 8 */
> > __u64 probe_addr; /* 40 8 */
> > } task_fd_query; /* 0 48 */
> > struct {
> > union {
> > __u32 prog_fd; /* 0 4 */
> > __u32 map_fd; /* 0 4 */
> > }; /* 0 4 */
> > union {
> > __u32 target_fd; /* 4 4 */
> > __u32 target_ifindex; /* 4 4 */
> > }; /* 4 4 */
> > __u32 attach_type; /* 8 4 */
> > __u32 flags; /* 12 4 */
> > union {
> > __u32 target_btf_id; /* 16 4 */
> > struct {
> > __u64 iter_info; /* 16 8 */
> > __u32 iter_info_len; /* 24 4 */
> > }; /* 16 16 */
> > struct {
> > __u64 bpf_cookie; /* 16 8 */
> > } perf_event; /* 16 8 */
> > struct {
> > __u32 flags; /* 16 4 */
> > __u32 cnt; /* 20 4 */
> > __u64 syms; /* 24 8 */
> > __u64 addrs; /* 32 8 */
> > __u64 cookies; /* 40 8 */
> > } kprobe_multi; /* 16 32 */
> > struct {
> > __u32 target_btf_id; /* 16 4 */
> >
> > /* XXX 4 bytes hole, try to pack */
> >
> > __u64 cookie; /* 24 8 */
> > } tracing; /* 16 16 */
> > struct {
> > __u32 pf; /* 16 4 */
> > __u32 hooknum; /* 20 4 */
> > __s32 priority; /* 24 4 */
> > __u32 flags; /* 28 4 */
> > } netfilter; /* 16 16 */
> > struct {
> > union {
> > __u32 relative_fd; /* 16 4 */
> > __u32 relative_id; /* 16 4 */
> > }; /* 16 4 */
> >
> > /* XXX 4 bytes hole, try to pack */
> >
> > __u64 expected_revision; /* 24 8 */
> > } tcx; /* 16 16 */
> > struct {
> > __u64 path; /* 16 8 */
> > __u64 offsets; /* 24 8 */
> > __u64 ref_ctr_offsets; /* 32 8 */
> > __u64 cookies; /* 40 8 */
> > __u32 cnt; /* 48 4 */
> > __u32 flags; /* 52 4 */
> > __u32 pid; /* 56 4 */
> > } uprobe_multi; /* 16 48 */
> > struct {
> > union {
> > __u32 relative_fd; /* 16 4 */
> > __u32 relative_id; /* 16 4 */
> > }; /* 16 4 */
> >
> > /* XXX 4 bytes hole, try to pack */
> >
> > __u64 expected_revision; /* 24 8 */
> > } netkit; /* 16 16 */
> > }; /* 16 48 */
> > } link_create; /* 0 64 */
> > struct {
> > __u32 link_fd; /* 0 4 */
> > union {
> > __u32 new_prog_fd; /* 4 4 */
> > __u32 new_map_fd; /* 4 4 */
> > }; /* 4 4 */
> > __u32 flags; /* 8 4 */
> > union {
> > __u32 old_prog_fd; /* 12 4 */
> > __u32 old_map_fd; /* 12 4 */
> > }; /* 12 4 */
> > } link_update; /* 0 16 */
> > struct {
> > __u32 link_fd; /* 0 4 */
> > } link_detach; /* 0 4 */
> > struct {
> > __u32 type; /* 0 4 */
> > } enable_stats; /* 0 4 */
> > struct {
> > __u32 link_fd; /* 0 4 */
> > __u32 flags; /* 4 4 */
> > } iter_create; /* 0 8 */
> > struct {
> > __u32 prog_fd; /* 0 4 */
> > __u32 map_fd; /* 4 4 */
> > __u32 flags; /* 8 4 */
> > } prog_bind_map; /* 0 12 */
> > struct {
> > __u32 flags; /* 0 4 */
> > __u32 bpffs_fd; /* 4 4 */
> > } token_create; /* 0 8 */
> > };
> >
> > root@number:~#
> >
> > So this is one case where BTF gets us only that far, not getting all
> > the way to automate the pretty printing of unions designed like 'union
> > bpf_attr', we will need a custom pretty printer for this union, as using
> > the libbpf union BTF dumper is way too verbose:
> >
> > root@number:~# perf trace --max-events 1 -e bpf bpftool map
> > 0.000 ( 0.054 ms): bpftool/3409073 bpf(cmd: PROG_LOAD, uattr: (union bpf_attr){(struct){.map_type = (__u32)1,.key_size = (__u32)2,.value_size = (__u32)2755142048,.max_entries = (__u32)32764,.map_flags = (__u32)150263906,.inner_map_fd = (__u32)21920,},(struct){.map_fd = (__u32)1,.key = (__u64)140723063628192,(union){.value = (__u64)94145833392226,.next_key = (__u64)94145833392226,},},.batch = (struct){.in_batch = (__u64)8589934593,.out_batch = (__u64)140723063628192,.keys = (__u64)94145833392226,},(struct){.prog_type = (__u32)1,.insn_cnt = (__u32)2,.insns = (__u64)140723063628192,.license = (__u64)94145833392226,},(struct){.pathname = (__u64)8589934593,.bpf_fd = (__u32)2755142048,.file_flags = (__u32)32764,.path_fd = (__s32)150263906,},(struct){(union){.target_fd = (__u32)1,.target_ifindex = (__u32)1,},.attach_bpf_fd = (__u32)2,.attach_type = (__u32)2755142048,.attach_flags = (__u32)32764,.replace_bpf_fd = (__u32)150263906,(union){.relative_fd = (__u32)21920,.relative_id = (__u32)21920,},},.test = (struct){.prog_fd = (__u32)1,.retval = (__u32)2,.data_size_in = (__u32)2755142048,.data_size_out = (__u32)32764,.data_in = (__u64)94145833392226,},(struct){(union){.start_id = (__u32)1,.prog_id = (__u32)1,.map_id = (__u32)1,.btf_id = (__u32)1,.link_id = (__u32)1,},.next_id = (__u32)2,.open_flags = (__u32)2755142048,},.info = (struct){.bpf_fd = (__u32)1,.info_len = (__u32)2,.info = (__u64)140723063628192,},.query = (struct){(union){.target_fd = (__u32)1,.target_ifindex = (__u32)1,},.attach_type = (__u32)2,.query_flags = (__u32)2755142048,.attach_flags = (__u32)32764,.prog_ids = (__u64)94145833392226,},.raw_tracepoint = (struct){.name = (__u64)8589934593,.prog_fd = (__u32)2755142048,.cookie = (__u64)94145833392226,},(struct){.btf = (__u64)8589934593,.btf_log_buf = (__u64)140723063628192,.btf_size = (__u32)150263906,.btf_log_size = (__u32)21920,},.task_fd_query = (struct){.pid = (__u32)1,.fd = (__u32)2,.flags = (__u32)2755142048,.buf_len = (__u32)32764,.buf = (__u64)94145833392226,},.link_create = (struct){(union){.prog_fd = (__u32)1,.map_fd = (__u32)1,},(u) = 3
> > root@number:~# 2: prog_array name hid_jmp_table flags 0x0
> > key 4B value 4B max_entries 1024 memlock 8440B
> > owner_prog_type tracing owner jited
> > 13: hash_of_maps name cgroup_hash flags 0x0
> > key 8B value 4B max_entries 2048 memlock 167584B
> > pids systemd(1)
> > 960: array name libbpf_global flags 0x0
> > key 4B value 32B max_entries 1 memlock 280B
> > 961: array name pid_iter.rodata flags 0x480
> > key 4B value 4B max_entries 1 memlock 8192B
> > btf_id 1846 frozen
> > pids bpftool(3409073)
> > 962: array name libbpf_det_bind flags 0x0
> > key 4B value 32B max_entries 1 memlock 280B
> >
> > root@number:~#
> >
> > For simpler unions this may be better than not seeing any payload, so
> > keep it there.
> >
> > Cc: Adrian Hunter <adrian.hunter@xxxxxxxxx>
> > Cc: Howard Chu <howardchu95@xxxxxxxxx>
> > Cc: Ian Rogers <irogers@xxxxxxxxxx>
> > Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
> > Cc: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
> > Cc: Namhyung Kim <namhyung@xxxxxxxxxx>
> > Signed-off-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
> > ---
> > tools/perf/builtin-trace.c | 17 ++++++++++-------
> > 1 file changed, 10 insertions(+), 7 deletions(-)
> >
> > diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> > index c47fde936c33a2e6..d28a56cc171b2b2e 100644
> > --- a/tools/perf/builtin-trace.c
> > +++ b/tools/perf/builtin-trace.c
> > @@ -1075,7 +1075,7 @@ static size_t trace__btf_scnprintf(struct trace *trace, struct syscall_arg *arg,
> >
> > if (btf_is_enum(arg_fmt->type))
> > return btf_enum_scnprintf(arg_fmt->type, trace->btf, bf, size, val);
> > - else if (btf_is_struct(arg_fmt->type))
> > + else if (btf_is_struct(arg_fmt->type) || btf_is_union(arg_fmt->type))
> > return btf_struct_scnprintf(arg_fmt->type, trace->btf, bf, size, arg);
> >
> > return 0;
> > @@ -2365,8 +2365,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
> > default_scnprintf = sc->arg_fmt[arg.idx].scnprintf;
> >
> > if (trace->force_btf ||
> > - (default_scnprintf == NULL ||
> > - (default_scnprintf == SCA_PTR && strstr(field->type, "struct")))) {
> > + (default_scnprintf == NULL || (default_scnprintf == SCA_PTR))) {
> > btf_printed = trace__btf_scnprintf(trace, &arg, bf + printed,
> > size - printed, val, field->type);
> > if (btf_printed) {
> > @@ -3663,14 +3662,18 @@ static int trace__bpf_sys_enter_beauty_map(struct trace *trace, int key, unsigne
> > return -1;
> >
> > for (i = 0, field = sc->args; field; ++i, field = field->next) {
> > - struct_offset = strstr(field->type, "struct ");
> > -
> > // XXX We're only collecting pointer payloads _from_ user space
> > if (!sc->arg_fmt[i].from_user)
> > continue;
> >
> > - if (field->flags & TEP_FIELD_IS_POINTER && struct_offset) { /* struct */
> > - struct_offset += 7;
> > + struct_offset = strstr(field->type, "struct ");
> > + if (struct_offset == NULL)
> > + struct_offset = strstr(field->type, "union ");
> > + else
> > + struct_offset++; // "union" is shorter
> > +
> > + if (field->flags & TEP_FIELD_IS_POINTER && struct_offset) { /* struct or union (think BPF's attr arg) */
> > + struct_offset += 6;
> >
> > /* for 'struct foo *', we only want 'foo' */
> > for (tmp = struct_offset, cnt = 0; *tmp != ' ' && *tmp != '\0'; ++tmp, ++cnt) {
> > --
> > 2.46.0
> >