Re: [PATCH 4/4] perf tools: determine if LR is the return address

From: James Clark
Date: Mon Feb 08 2021 - 13:13:52 EST




On 22/01/2021 18:18, Alexandre Truong wrote:

> +}
> +
> +static int add_entry(struct unwind_entry *entry, void *arg)
> +{
> + struct entries *entries = arg;
> +
> + entries->stack[entries->i++] = entry->ip;
> + return 0;
> +}
> +
> +u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread)
> +{
> + u64 leaf_frame;
> + struct entries entries = {{0, 0}, 0};
> +
> + if (get_leaf_frame_caller_enabled(sample))
> + return 0;
> +
> + unwind__get_entries(add_entry, &entries, thread, sample, 2);
> + leaf_frame = callchain_param.order == ORDER_CALLER ?
> + entries.stack[0] : entries.stack[1];
> +
> + if (leaf_frame + 1 == sample->user_regs.regs[PERF_REG_ARM64_LR])
> + return sample->user_regs.regs[PERF_REG_ARM64_LR];

Hi Alex,

>From your other reply about your investigation it looks like the check against PERF_REG_ARM64_LR isn't
required because libunwind won't return a value if it's not correct. Whether it's equal to the LR or not.

And PERF_REG_ARM64_LR points to the instruction _after_ the call site. i.e. where to return to,
not where the call was made from. So just leaf_frame rather than leaf_frame+1 would be more accurate.

I was also looking at unwind_entry in machine.c which is similar to your add_entry function and saw that it
does some extra bits like this:

if (symbol_conf.hide_unresolved && entry->ms.sym == NULL)
return 0;

if (append_inlines(cursor, &entry->ms, entry->ip) == 0)
return 0;

/*
* Convert entry->ip from a virtual address to an offset in
* its corresponding binary.
*/
if (entry->ms.map)
addr = map__map_ip(entry->ms.map, entry->ip);

I have a feeling you will also need to do those on your values returned from libunwind to make it 100%
equivalent.

James

> + return 0;
> +}
> diff --git a/tools/perf/util/arm-frame-pointer-unwind-support.h b/tools/perf/util/arm-frame-pointer-unwind-support.h
> new file mode 100644
> index 000000000000..16dc03fa9abe
> --- /dev/null
> +++ b/tools/perf/util/arm-frame-pointer-unwind-support.h
> @@ -0,0 +1,7 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
> +#define __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
> +
> +u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread);
> +
> +#endif /* __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H */
> diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
> index 40082d70eec1..bc6147e46c89 100644
> --- a/tools/perf/util/machine.c
> +++ b/tools/perf/util/machine.c
> @@ -34,6 +34,7 @@
> #include "bpf-event.h"
> #include <internal/lib.h> // page_size
> #include "cgroup.h"
> +#include "arm-frame-pointer-unwind-support.h"
>
> #include <linux/ctype.h>
> #include <symbol/kallsyms.h>
> @@ -2671,10 +2672,12 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
> return err;
> }
>
> -static u64 get_leaf_frame_caller(struct perf_sample *sample __maybe_unused,
> - struct thread *thread __maybe_unused)
> +static u64 get_leaf_frame_caller(struct perf_sample *sample, struct thread *thread)
> {
> - return 0;
> + if (strncmp(thread->maps->machine->env->arch, "aarch64", 7) == 0)
> + return get_leaf_frame_caller_aarch64(sample, thread);
> + else
> + return 0;
> }
>
> static int thread__resolve_callchain_sample(struct thread *thread,
>