Re: [PATCH v4 4/6] perf tools: enable dwarf_callchain_users on arm64
From: Mark Rutland
Date: Wed Dec 15 2021 - 11:37:55 EST
On Wed, Dec 15, 2021 at 03:11:36PM +0000, German Gomez wrote:
> From: Alexandre Truong <alexandre.truong@xxxxxxx>
>
> On arm64, enable dwarf_callchain_users which will be needed
> to do a dwarf unwind in order to get the caller of the leaf frame.
>
> Signed-off-by: Alexandre Truong <alexandre.truong@xxxxxxx>
> Signed-off-by: German Gomez <german.gomez@xxxxxxx>
> ---
> tools/perf/builtin-report.c | 4 ++--
> tools/perf/builtin-script.c | 4 ++--
> tools/perf/util/callchain.c | 9 ++++++++-
> tools/perf/util/callchain.h | 2 +-
> 4 files changed, 13 insertions(+), 6 deletions(-)
>
> diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> index 8167ebfe776a..a31ad60ba66e 100644
> --- a/tools/perf/builtin-report.c
> +++ b/tools/perf/builtin-report.c
> @@ -410,7 +410,7 @@ static int report__setup_sample_type(struct report *rep)
> }
> }
>
> - callchain_param_setup(sample_type);
> + callchain_param_setup(sample_type, perf_env__arch(&rep->session->header.env));
>
> if (rep->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
> ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
> @@ -1124,7 +1124,7 @@ static int process_attr(struct perf_tool *tool __maybe_unused,
> * on events sample_type.
> */
> sample_type = evlist__combined_sample_type(*pevlist);
> - callchain_param_setup(sample_type);
> + callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env));
> return 0;
> }
>
> diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> index ab7d575f97f2..d308adfd1176 100644
> --- a/tools/perf/builtin-script.c
> +++ b/tools/perf/builtin-script.c
> @@ -2318,7 +2318,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
> * on events sample_type.
> */
> sample_type = evlist__combined_sample_type(evlist);
> - callchain_param_setup(sample_type);
> + callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env));
>
> /* Enable fields for callchain entries */
> if (symbol_conf.use_callchain &&
> @@ -3468,7 +3468,7 @@ static void script__setup_sample_type(struct perf_script *script)
> struct perf_session *session = script->session;
> u64 sample_type = evlist__combined_sample_type(session->evlist);
>
> - callchain_param_setup(sample_type);
> + callchain_param_setup(sample_type, perf_env__arch(session->machines.host.env));
>
> if (script->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
> pr_warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
> diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
> index 8e2777133bd9..aaab9a674807 100644
> --- a/tools/perf/util/callchain.c
> +++ b/tools/perf/util/callchain.c
> @@ -1600,7 +1600,7 @@ void callchain_cursor_reset(struct callchain_cursor *cursor)
> map__zput(node->ms.map);
> }
>
> -void callchain_param_setup(u64 sample_type)
> +void callchain_param_setup(u64 sample_type, const char *arch)
> {
> if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
> if ((sample_type & PERF_SAMPLE_REGS_USER) &&
> @@ -1612,6 +1612,13 @@ void callchain_param_setup(u64 sample_type)
> else
> callchain_param.record_mode = CALLCHAIN_FP;
> }
> +
> + /*
> + * It's possible to determine the caller of leaf frames with omitted
> + * frame pointers on aarch64 using libunwind, so enable it.
> + */
I reckon it's worth mentioning *why* we need to do this; how about:
/*
* It's necessary to use libunwind to reliably determine the caller of
* a leaf function on aarch64, as otherwise we cannot know whether to
* start from the LR or FP.
*
* Always starting from the LR can result in duplicate or entirely
* erroneous entries. Always skipping the LR and starting from the FP
* can result in missing entries.
*/
Other than that, this looks fine to me!
Thanks,
Mark.
> + if (callchain_param.record_mode == CALLCHAIN_FP && !strcmp(arch, "arm64"))
> + dwarf_callchain_users = true;
> }
>
> static bool chain_match(struct callchain_list *base_chain,
> diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
> index 77fba053c677..d95615daed73 100644
> --- a/tools/perf/util/callchain.h
> +++ b/tools/perf/util/callchain.h
> @@ -300,7 +300,7 @@ int callchain_branch_counts(struct callchain_root *root,
> u64 *branch_count, u64 *predicted_count,
> u64 *abort_count, u64 *cycles_count);
>
> -void callchain_param_setup(u64 sample_type);
> +void callchain_param_setup(u64 sample_type, const char *arch);
>
> bool callchain_cnode_matched(struct callchain_node *base_cnode,
> struct callchain_node *pair_cnode);
> --
> 2.25.1
>