[PATCH 2/2] perf annotate: Add fusion logic for AMD microarchs

From: Ravi Bangoria
Date: Mon Sep 06 2021 - 06:57:42 EST


AMD family 15h and above microarchs fuse a subset of cmp/test/ALU
instructions with branch instructions[1][2]. Add perf annotate
fused instruction support for these microarchs.

Before:
│ testb $0x80,0x51(%rax)
│ ┌──jne 5b3
0.78 │ │ mov %r13,%rdi
│ │→ callq mark_page_accessed
1.08 │5b3:└─→mov 0x8(%r13),%rax

After:
│ ┌──testb $0x80,0x51(%rax)
│ ├──jne 5b3
0.78 │ │ mov %r13,%rdi
│ │→ callq mark_page_accessed
1.08 │5b3:└─→mov 0x8(%r13),%rax

[1] https://bugzilla.kernel.org/attachment.cgi?id=298553
[2] https://bugzilla.kernel.org/attachment.cgi?id=298555

Reported-by: Kim Phillips <kim.phillips@xxxxxxx>
Signed-off-by: Ravi Bangoria <ravi.bangoria@xxxxxxx>
---
tools/perf/arch/x86/annotate/instructions.c | 37 ++++++++++++++++++++-
tools/perf/util/annotate.c | 1 +
2 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c
index 24ea12ec7e02..46d7124cc4e1 100644
--- a/tools/perf/arch/x86/annotate/instructions.c
+++ b/tools/perf/arch/x86/annotate/instructions.c
@@ -144,8 +144,31 @@ static struct ins x86__instructions[] = {
{ .name = "xorps", .ops = &mov_ops, },
};

-static bool x86__ins_is_fused(struct arch *arch, const char *ins1,
+static bool amd__ins_is_fused(struct arch *arch, const char *ins1,
const char *ins2)
+{
+ if (strstr(ins2, "jmp"))
+ return false;
+
+ /* Family >= 15h supports cmp/test + branch fusion */
+ if (arch->family >= 0x15 && (strstarts(ins1, "test") ||
+ (strstarts(ins1, "cmp") && !strstr(ins1, "xchg")))) {
+ return true;
+ }
+
+ /* Family >= 19h supports some ALU + branch fusion */
+ if (arch->family >= 0x19 && (strstarts(ins1, "add") ||
+ strstarts(ins1, "sub") || strstarts(ins1, "and") ||
+ strstarts(ins1, "inc") || strstarts(ins1, "dec") ||
+ strstarts(ins1, "or") || strstarts(ins1, "xor"))) {
+ return true;
+ }
+
+ return false;
+}
+
+static bool intel__ins_is_fused(struct arch *arch, const char *ins1,
+ const char *ins2)
{
if (arch->family != 6 || arch->model < 0x1e || strstr(ins2, "jmp"))
return false;
@@ -172,6 +195,15 @@ static bool x86__ins_is_fused(struct arch *arch, const char *ins1,
return false;
}

+static bool x86__ins_is_fused(struct arch *arch, const char *ins1,
+ const char *ins2)
+{
+ if (strstarts(arch->vendor, "AuthenticAMD"))
+ return amd__ins_is_fused(arch, ins1, ins2);
+
+ return intel__ins_is_fused(arch, ins1, ins2);
+}
+
static int x86__cpuid_parse(struct arch *arch, char *cpuid)
{
unsigned int family, model, stepping;
@@ -184,6 +216,9 @@ static int x86__cpuid_parse(struct arch *arch, char *cpuid)
if (ret == 3) {
arch->family = family;
arch->model = model;
+ arch->vendor = strndup(cpuid, 12);
+ if (!arch->vendor)
+ return -1;
return 0;
}

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 0bae061b2d6d..88326bb990b5 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -77,6 +77,7 @@ struct arch {
bool sorted_instructions;
bool initialized;
void *priv;
+ char *vendor;
unsigned int model;
unsigned int family;
int (*init)(struct arch *arch, char *cpuid);
--
2.27.0