Re: [PATCH 2/2] perf annotate: Add fusion logic for AMD microarchs

From: Arnaldo Carvalho de Melo
Date: Thu Sep 09 2021 - 16:33:18 EST


Em Mon, Sep 06, 2021 at 04:26:40PM +0530, Ravi Bangoria escreveu:
> AMD family 15h and above microarchs fuse a subset of cmp/test/ALU
> instructions with branch instructions[1][2]. Add perf annotate
> fused instruction support for these microarchs.
>
> Before:
> │ testb $0x80,0x51(%rax)
> │ ┌──jne 5b3
> 0.78 │ │ mov %r13,%rdi
> │ │→ callq mark_page_accessed
> 1.08 │5b3:└─→mov 0x8(%r13),%rax
>
> After:
> │ ┌──testb $0x80,0x51(%rax)
> │ ├──jne 5b3
> 0.78 │ │ mov %r13,%rdi
> │ │→ callq mark_page_accessed
> 1.08 │5b3:└─→mov 0x8(%r13),%rax
>
> [1] https://bugzilla.kernel.org/attachment.cgi?id=298553
> [2] https://bugzilla.kernel.org/attachment.cgi?id=298555
>
> Reported-by: Kim Phillips <kim.phillips@xxxxxxx>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@xxxxxxx>
> ---
> tools/perf/arch/x86/annotate/instructions.c | 37 ++++++++++++++++++++-
> tools/perf/util/annotate.c | 1 +
> 2 files changed, 37 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c
> index 24ea12ec7e02..46d7124cc4e1 100644
> --- a/tools/perf/arch/x86/annotate/instructions.c
> +++ b/tools/perf/arch/x86/annotate/instructions.c
> @@ -144,8 +144,31 @@ static struct ins x86__instructions[] = {
> { .name = "xorps", .ops = &mov_ops, },
> };
>
> -static bool x86__ins_is_fused(struct arch *arch, const char *ins1,
> +static bool amd__ins_is_fused(struct arch *arch, const char *ins1,
> const char *ins2)
> +{
> + if (strstr(ins2, "jmp"))
> + return false;
> +
> + /* Family >= 15h supports cmp/test + branch fusion */
> + if (arch->family >= 0x15 && (strstarts(ins1, "test") ||
> + (strstarts(ins1, "cmp") && !strstr(ins1, "xchg")))) {
> + return true;
> + }
> +
> + /* Family >= 19h supports some ALU + branch fusion */
> + if (arch->family >= 0x19 && (strstarts(ins1, "add") ||
> + strstarts(ins1, "sub") || strstarts(ins1, "and") ||
> + strstarts(ins1, "inc") || strstarts(ins1, "dec") ||
> + strstarts(ins1, "or") || strstarts(ins1, "xor"))) {
> + return true;
> + }
> +
> + return false;
> +}
> +
> +static bool intel__ins_is_fused(struct arch *arch, const char *ins1,
> + const char *ins2)
> {
> if (arch->family != 6 || arch->model < 0x1e || strstr(ins2, "jmp"))
> return false;
> @@ -172,6 +195,15 @@ static bool x86__ins_is_fused(struct arch *arch, const char *ins1,
> return false;
> }
>
> +static bool x86__ins_is_fused(struct arch *arch, const char *ins1,
> + const char *ins2)
> +{
> + if (strstarts(arch->vendor, "AuthenticAMD"))
> + return amd__ins_is_fused(arch, ins1, ins2);
> +
> + return intel__ins_is_fused(arch, ins1, ins2);
> +}
> +

Can we instead make x86__ins_is_fused be a pointer and instead of
storing arch->vendor we set it to one of amd__ins_is_fused() or
intel__ins_is_fused()?

I.e. here:

> static int x86__cpuid_parse(struct arch *arch, char *cpuid)
> {
> unsigned int family, model, stepping;
> @@ -184,6 +216,9 @@ static int x86__cpuid_parse(struct arch *arch, char *cpuid)
> if (ret == 3) {
> arch->family = family;
> arch->model = model;
> + arch->vendor = strndup(cpuid, 12);

x86__ins_is_fused = strstarts(cpuid, "AuthenticAMD") ?
amd__ins_is_fused :
intel__ins_is_fused;


?

> + if (!arch->vendor)
> + return -1;
> return 0;
> }
>
> diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
> index 0bae061b2d6d..88326bb990b5 100644
> --- a/tools/perf/util/annotate.c
> +++ b/tools/perf/util/annotate.c
> @@ -77,6 +77,7 @@ struct arch {
> bool sorted_instructions;
> bool initialized;
> void *priv;
> + char *vendor;
> unsigned int model;
> unsigned int family;
> int (*init)(struct arch *arch, char *cpuid);
> --
> 2.27.0

--

- Arnaldo