Re: [PATCH v5 00/11] perf: Support multiple system call tables in the build
From: Namhyung Kim
Date: Sat Mar 15 2025 - 19:02:48 EST
On Fri, Mar 14, 2025 at 05:48:12PM -0300, Arnaldo Carvalho de Melo wrote:
> On Fri, Mar 14, 2025 at 02:26:41PM -0300, Arnaldo Carvalho de Melo wrote:
> > it finds the pair, but then its sc->args has a bogus pointer... I'll see
> > where this isn't being initialized...
>
> Breakpoint 4, trace__find_usable_bpf_prog_entry (trace=0x7fffffffa510, sc=0x1046f10) at builtin-trace.c:3874
> 3874 bool is_candidate = false;
> (gdb) n
> 3876 if (pair == NULL || pair == sc ||
> (gdb) p pair
> $7 = (struct syscall *) 0x1083c50
> (gdb) p pair->name
> $8 = 0x81478e "accept4"
> (gdb) n
> 3877 pair->bpf_prog.sys_enter == trace->skel->progs.syscall_unaugmented)
> (gdb) p i
> $9 = 1
> (gdb) n
> 3876 if (pair == NULL || pair == sc ||
> (gdb) n
> 3880 printf("sc=%p\n", sc); fflush(stdout);
> (gdb) n
> sc=0x1046f10
> 3881 printf("sc->name=%p\n", sc->name); fflush(stdout);
> (gdb) n
> sc->name=0x6c66202c786c3830
> 3882 printf("sc->nr_args=%d, sc->args=%p\n", sc->nr_args, sc->args); fflush(stdout);
> (gdb) p sc->nr_args
> $10 = 1935635045
> (gdb) p sc->args
> $11 = (struct tep_format_field *) 0x257830203a6e656c
> (gdb) p *sc
> $12 = {e_machine = 540697702, id = 807761968, tp_format = 0x657075202c786c38, nr_args = 1935635045, args_size = 1634427759, bpf_prog = {sys_enter = 0x257830203a726464,
> sys_exit = 0x7075202c786c3830}, is_exit = 101, is_open = 101, nonexistent = 114, use_btf = 95, args = 0x257830203a6e656c,
> name = 0x6c66202c786c3830 <error: Cannot access memory at address 0x6c66202c786c3830>, fmt = 0x257830203a736761, arg_fmt = 0x786c3830}
> (gdb)
>
> Ok, ran out of time, but if I simple avoid the second loop in:
>
> static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace, int e_machine)
>
>
> I.e. the one that starts with:
>
> /*
> * Now lets do a second pass looking for enabled syscalls without
> * an augmenter that have a signature that is a superset of another
> * syscall with an augmenter so that we can auto-reuse it.
>
> This:
>
> diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> index e0434f7dc67cb988..3664bb512c70cabf 100644
> --- a/tools/perf/builtin-trace.c
> +++ b/tools/perf/builtin-trace.c
> @@ -3989,6 +3989,8 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace, int e_m
> goto out;
> }
>
> + return 0;
> +
> /*
> * Now lets do a second pass looking for enabled syscalls without
> * an augmenter that have a signature that is a superset of another
> ⬢ [acme@toolbox perf-tools-next]$
>
>
> Then all works, we don't reuse any BPF program, but then that is an
> heuristic anyway, that is tried becuase landlock_add_rule has a pointer
> argument:
>
> root@number:~# perf trace -e landlock_add_rule perf test -w landlock
> 0.000 ( 0.003 ms): perf/71034 landlock_add_rule(ruleset_fd: 11, rule_type: LANDLOCK_RULE_PATH_BENEATH, rule_attr: 0x7fff6f2bb550, flags: 45) = -1 EINVAL (Invalid argument)
> 0.004 ( 0.001 ms): perf/71034 landlock_add_rule(ruleset_fd: 11, rule_type: LANDLOCK_RULE_NET_PORT, rule_attr: 0x7fff6f2bb540, flags: 45) = -1 EINVAL (Invalid argument)
> root@number:~# perf test enum
> 105: perf trace enum augmentation tests : Ok
> root@number:~#
>
> So its some sort of syncronization on the various new tables, sorted by
> name, etc that then when iterating over the syscalls ends up using a sc
> that is not initialized.
Right, I've realized that calling trace__syscall_info() can invalidate
the existing sc since it calls trace__find_syscall() which reallocates
and resorts the syscall table. That's why it was ok when no filter was
used since it'd allocate the whole table in the first pass. Otherwise
it looks for a pair syscall while holding the original sc but calling
the function would invalidate the sc.
What about this (on top of my earlier fix)?
Thanks,
Namhyung
---8<---
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 49199d753b7cafbf..da0ddc713e6b35da 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2506,10 +2506,12 @@ static struct syscall *trace__find_syscall(struct trace *trace, int e_machine, i
};
struct syscall *sc, *tmp;
- sc = bsearch(&key, trace->syscalls.table, trace->syscalls.table_size,
- sizeof(struct syscall), syscall__cmp);
- if (sc)
- return sc;
+ if (trace->syscalls.table) {
+ sc = bsearch(&key, trace->syscalls.table, trace->syscalls.table_size,
+ sizeof(struct syscall), syscall__cmp);
+ if (sc)
+ return sc;
+ }
tmp = reallocarray(trace->syscalls.table, trace->syscalls.table_size + 1,
sizeof(struct syscall));
@@ -3855,6 +3857,10 @@ static int trace__bpf_sys_enter_beauty_map(struct trace *trace, int e_machine, i
static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc)
{
+ int orig_id = sc->id;
+ const char *orig_name = sc->name;
+ int e_machine = sc->e_machine;
+ struct tep_format_field *args = sc->args;
struct tep_format_field *field, *candidate_field;
/*
* We're only interested in syscalls that have a pointer:
@@ -3866,18 +3872,19 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace
return NULL;
+ /* calling trace__syscall_info() may invalidate 'sc' */
try_to_find_pair:
- for (int i = 0, num_idx = syscalltbl__num_idx(sc->e_machine); i < num_idx; ++i) {
- int id = syscalltbl__id_at_idx(sc->e_machine, i);
- struct syscall *pair = trace__syscall_info(trace, NULL, sc->e_machine, id);
+ for (int i = 0, num_idx = syscalltbl__num_idx(e_machine); i < num_idx; ++i) {
+ int id = syscalltbl__id_at_idx(e_machine, i);
+ struct syscall *pair = trace__syscall_info(trace, NULL, e_machine, id);
struct bpf_program *pair_prog;
bool is_candidate = false;
- if (pair == NULL || pair == sc ||
+ if (pair == NULL || pair->id == orig_id ||
pair->bpf_prog.sys_enter == trace->skel->progs.syscall_unaugmented)
continue;
- for (field = sc->args, candidate_field = pair->args;
+ for (field = args, candidate_field = pair->args;
field && candidate_field; field = field->next, candidate_field = candidate_field->next) {
bool is_pointer = field->flags & TEP_FIELD_IS_POINTER,
candidate_is_pointer = candidate_field->flags & TEP_FIELD_IS_POINTER;
@@ -3944,7 +3951,7 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace
goto next_candidate;
}
- pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, sc->name);
+ pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, orig_name);
return pair_prog;
next_candidate:
continue;
@@ -4041,6 +4048,11 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace, int e_m
if (pair_prog == NULL)
continue;
+ /*
+ * Get syscall info again as find usable entry above might
+ * modify the syscall table and shuffle it.
+ */
+ sc = trace__syscall_info(trace, NULL, e_machine, key);
sc->bpf_prog.sys_enter = pair_prog;
/*