Re: [PATCH v2 03/16] perf annotate-arm64: Generalize arm64_mov__parse to support standard operands
From: Namhyung Kim
Date: Tue Apr 07 2026 - 02:59:37 EST
On Fri, Apr 03, 2026 at 09:47:47AM +0000, Tengda Wu wrote:
> The current arm64_mov__parse() implementation strictly requires the
> operand to contain a symbol suffix in the "<symbol>" format. This
> causes the parser to fail for standard instructions that only contain
> raw immediates or registers without symbolic annotations.
>
> Refactor the function to make symbol matching optional. The parser now
> correctly extracts the target operand and only attempts to parse the
> "<symbol>" suffix if it exists. This change also introduces better
> handling for whitespace and comments, and adds support for multi-register
> check via arm64__check_multi_regs(), ensuring compatibility with a
> wider range of arm64 instruction formats.
>
> Signed-off-by: Tengda Wu <wutengda@xxxxxxxxxxxxxxx>
> ---
> .../perf/util/annotate-arch/annotate-arm64.c | 85 ++++++++++++++-----
> 1 file changed, 65 insertions(+), 20 deletions(-)
>
> diff --git a/tools/perf/util/annotate-arch/annotate-arm64.c b/tools/perf/util/annotate-arch/annotate-arm64.c
> index 33080fdca125..4c42323b0c18 100644
> --- a/tools/perf/util/annotate-arch/annotate-arm64.c
> +++ b/tools/perf/util/annotate-arch/annotate-arm64.c
> @@ -14,12 +14,38 @@ struct arch_arm64 {
> regex_t jump_insn;
> };
>
> +static bool arm64__check_multi_regs(const char *op)
> +{
> + char *comma = strchr(op, ',');
> +
> + while (comma) {
> + char *next = comma + 1;
> +
> + next = skip_spaces(next);
> +
> + /*
> + * Check the first valid character after the comma:
> + * - If it is '#', it indicates an immediate offset (e.g., [x1, #16]).
> + * - If it is an alphabetic character, it is highly likely a
> + * register name (e.g., x, w, s, d, q, v, p, z).
> + * - Special cases: Alias and control registers like sp, xzr,
> + * and wzr all start with an alphabetic character.
> + */
> + if (*next && *next != '#' && isalpha(*next))
> + return true;
It seems you check any alphabet charactor after a comma for multi-regs.
Does that mean the first component before the comma should be another
register?
> +
> + comma = strchr(next, ',');
> + }
> +
> + return false;
> +}
> +
> static int arm64_mov__parse(const struct arch *arch __maybe_unused,
> struct ins_operands *ops,
> struct map_symbol *ms __maybe_unused,
> struct disasm_line *dl __maybe_unused)
> {
> - char *s = strchr(ops->raw, ','), *target, *endptr;
> + char *s = strchr(ops->raw, ','), *target, *endptr, *comment, prev;
>
> if (s == NULL)
> return -1;
> @@ -31,29 +57,48 @@ static int arm64_mov__parse(const struct arch *arch __maybe_unused,
> if (ops->source.raw == NULL)
> return -1;
>
> - target = ++s;
> + target = skip_spaces(++s);
> + comment = strchr(s, arch->objdump.comment_char);
> +
> + if (comment != NULL)
> + s = comment - 1;
> + else
> + s = strchr(s, '\0') - 1;
An interesting use of strchr(). Oh, I found the strchr(3) man page
also mentions that it's a supported use case. TIL.
> +
> + while (s > target && isspace(s[0]))
> + --s;
> + s++;
> + prev = *s;
> + *s = '\0';
> ops->target.raw = strdup(target);
> + *s = prev;
> +
> if (ops->target.raw == NULL)
> goto out_free_source;
>
> - ops->target.addr = strtoull(target, &endptr, 16);
> - if (endptr == target)
> - goto out_free_target;
> -
> - s = strchr(endptr, '<');
> - if (s == NULL)
> - goto out_free_target;
> - endptr = strchr(s + 1, '>');
> - if (endptr == NULL)
> - goto out_free_target;
> -
> - *endptr = '\0';
> - *s = ' ';
> - ops->target.name = strdup(s);
> - *s = '<';
> - *endptr = '>';
> - if (ops->target.name == NULL)
> - goto out_free_target;
> + ops->target.multi_regs = arm64__check_multi_regs(ops->target.raw);
> +
> + /* Parse address followed by symbol name, e.g. "addr <symbol>" */
> + if (strchr(target, '<') != NULL) {
> + ops->target.addr = strtoull(target, &endptr, 16);
> + if (endptr == target)
> + goto out_free_target;
Hmm.. shouldn't this part be executed regardless of presence of a symbol
name?
> +
> + s = strchr(endptr, '<');
> + if (s == NULL)
> + goto out_free_target;
It'd be safer to check `if (*skip_spaces(endptr) == '<')` rather than
strchr().
> + endptr = strchr(s + 1, '>');
> + if (endptr == NULL)
> + goto out_free_target;
I'm afraid C++ programs can have symbols with <> for templates.
Probably strrchr() would work.
Thanks,
Namhyung
> +
> + *endptr = '\0';
> + *s = ' ';
> + ops->target.name = strdup(s);
> + *s = '<';
> + *endptr = '>';
> + if (ops->target.name == NULL)
> + goto out_free_target;
> + }
>
> return 0;
>
> --
> 2.34.1
>