Re: [tip:perf/core] perf trace: Allow overriding global --max-stack per event

From: Ravi Bangoria
Date: Mon Jan 29 2018 - 10:09:12 EST


Hi Arnaldo,

This commit seems to be causing a regression:

$ ./perf trace record -g ls


Perf compiled from acme/perf/core:

$ ./perf trace -i perf.data
0.200 ( 0.016 ms): ls/19722 brk(
0.367 ( 0.024 ms): ls/19722 access(filename: 0xa1438b70, mode: R
0.401 ( 0.019 ms): ls/19722 open(filename: 0xa1438978, flags: CLOEXEC

Missing calltraces ^^^^^^^^^


Distro perf:

$ perf trace -i perf.data
0.200 ( 0.016 ms): ls/19722 brk(
do_syscall_trace_leave ([kernel.kallsyms])
[0] ([unknown])
syscall_exit_work ([kernel.kallsyms])
brk (/usr/lib64/ld-2.17.so)
_dl_sysdep_start (/usr/lib64/ld-2.17.so)
_dl_start_final (/usr/lib64/ld-2.17.so)
_dl_start (/usr/lib64/ld-2.17.so)
_start (/usr/lib64/ld-2.17.so)
0.367 ( 0.024 ms): ls/19722 access(filename: 0xa1438b70, mode: R
do_syscall_trace_leave ([kernel.kallsyms])
[0] ([unknown])
syscall_exit_work ([kernel.kallsyms])
access (/usr/lib64/ld-2.17.so)
dl_main (/usr/lib64/ld-2.17.so)
_dl_sysdep_start (/usr/lib64/ld-2.17.so)
_dl_start_final (/usr/lib64/ld-2.17.so)
_dl_start (/usr/lib64/ld-2.17.so)
_start (/usr/lib64/ld-2.17.so)
0.401 ( 0.019 ms): ls/19722 open(filename: 0xa1438978, flags: CLOEXEC
do_syscall_trace_leave ([kernel.kallsyms])
[0] ([unknown])
syscall_exit_work ([kernel.kallsyms])
open64 (/usr/lib64/ld-2.17.so)


Patch something like below should fix the issue ??

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 531d43b..d0ace22 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1642,9 +1642,12 @@ static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evse
struct callchain_cursor *cursor)
{
struct addr_location al;
+ int max_stack = evsel->attr.sample_max_stack ?
+ evsel->attr.sample_max_stack:
+ trace->max_stack;

if (machine__resolve(trace->host, &al, sample) < 0 ||
- thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, evsel->attr.sample_max_stack))
+ thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack))
return -1;

return 0;


Thanks,
Ravi


On 01/17/2018 10:04 PM, tip-bot for Arnaldo Carvalho de Melo wrote:
> Commit-ID: bd3dda9ab0fbdb8a91a2e869d93a0c9692b8444f
> Gitweb: https://git.kernel.org/tip/bd3dda9ab0fbdb8a91a2e869d93a0c9692b8444f
> Author: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
> AuthorDate: Mon, 15 Jan 2018 11:33:53 -0300
> Committer: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
> CommitDate: Wed, 17 Jan 2018 10:23:33 -0300
>
> perf trace: Allow overriding global --max-stack per event
>
> The per-event max-stack setting wasn't overriding the global --max-stack
> setting:
>
> # perf trace --no-syscalls --max-stack 4 -e probe_libc:inet_pton/call-graph=dwarf,max-stack=2/ ping -6 -c 1 ::1
> PING ::1(::1) 56 data bytes
> 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.072 ms
>
> --- ::1 ping statistics ---
> 1 packets transmitted, 1 received, 0% packet loss, time 0ms
> rtt min/avg/max/mdev = 0.072/0.072/0.072/0.000 ms
> 0.000 probe_libc:inet_pton:(7feb7a998350))
> __inet_pton (inlined)
> gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so)
> __GI_getaddrinfo (inlined)
> [0xffffaa39b6108f3f] (/usr/bin/ping)
> #
>
> Fix it:
>
> # perf trace --no-syscalls --max-stack 4 -e probe_libc:inet_pton/call-graph=dwarf,max-stack=2/ ping -6 -c 1 ::1
> PING ::1(::1) 56 data bytes
> 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.073 ms
>
> --- ::1 ping statistics ---
> 1 packets transmitted, 1 received, 0% packet loss, time 0ms
> rtt min/avg/max/mdev = 0.073/0.073/0.073/0.000 ms
> 0.000 probe_libc:inet_pton:(7f1083221350))
> __inet_pton (inlined)
> gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so)
> #
>
> Cc: Adrian Hunter <adrian.hunter@xxxxxxxxx>
> Cc: David Ahern <dsahern@xxxxxxxxx>
> Cc: Hendrick Brueckner <brueckner@xxxxxxxxxxxxxxxxxx>
> Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
> Cc: Namhyung Kim <namhyung@xxxxxxxxxx>
> Cc: Thomas Richter <tmricht@xxxxxxxxxxxxxxxxxx>
> Cc: Wang Nan <wangnan0@xxxxxxxxxx>
> Link: https://lkml.kernel.org/n/tip-ic3g837xg8ob3kcpkspxwz0g@xxxxxxxxxxxxxx
> Signed-off-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
> ---
> tools/perf/builtin-trace.c | 14 +++++++++++++-
> 1 file changed, 13 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> index ee85c29..531d43b 100644
> --- a/tools/perf/builtin-trace.c
> +++ b/tools/perf/builtin-trace.c
> @@ -1644,7 +1644,7 @@ static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evse
> struct addr_location al;
>
> if (machine__resolve(trace->host, &al, sample) < 0 ||
> - thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
> + thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, evsel->attr.sample_max_stack))
> return -1;
>
> return 0;
> @@ -2423,6 +2423,18 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
> trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
> evlist->threads->nr > 1 ||
> perf_evlist__first(evlist)->attr.inherit;
> +
> + /*
> + * Now that we already used evsel->attr to ask the kernel to setup the
> + * events, lets reuse evsel->attr.sample_max_stack as the limit in
> + * trace__resolve_callchain(), allowing per-event max-stack settings
> + * to override an explicitely set --max-stack global setting.
> + */
> + evlist__for_each_entry(evlist, evsel) {
> + if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) &&
> + evsel->attr.sample_max_stack == 0)
> + evsel->attr.sample_max_stack = trace->max_stack;
> + }
> again:
> before = trace->nr_events;
>
>