[PATCH v2 1/2] perf annotate: Handle x86 instruction suffix generally

From: Namhyung Kim
Date: Wed May 24 2023 - 16:51:02 EST


In AT&T asm syntax, most of x86 instructions can have size suffix like
b, w, l or q. Instead of adding all these instructions in the table,
we can handle them in a general way.

For example, it can try to find an instruction as is. If not found,
assuming it has a suffix and it'd try again without the suffix if it's
one of the allowed suffixes. This way, we can reduce the instruction
table size for duplicated entries of the same instructions with a
different suffix.

If an instruction xyz and others like xyz<suffix> are completely
different ones, then they both need to be listed in the table so that
they can be found before the second attempt (without the suffix).

Signed-off-by: Namhyung Kim <namhyung@xxxxxxxxxx>
---
tools/perf/util/annotate.c | 22 ++++++++++++++++++++++
1 file changed, 22 insertions(+)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index b708bbc49c9e..7f05f2a2aa83 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -70,6 +70,7 @@ struct arch {
struct ins_ops *(*associate_instruction_ops)(struct arch *arch, const char *name);
bool sorted_instructions;
bool initialized;
+ const char *insn_suffix;
void *priv;
unsigned int model;
unsigned int family;
@@ -179,6 +180,7 @@ static struct arch architectures[] = {
.init = x86__annotate_init,
.instructions = x86__instructions,
.nr_instructions = ARRAY_SIZE(x86__instructions),
+ .insn_suffix = "bwlq",
.objdump = {
.comment_char = '#',
},
@@ -720,6 +722,26 @@ static struct ins_ops *__ins__find(struct arch *arch, const char *name)
}

ins = bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp);
+ if (ins)
+ return ins->ops;
+
+ if (arch->insn_suffix) {
+ char tmp[32];
+ char suffix;
+ size_t len = strlen(name);
+
+ if (len == 0 || len >= sizeof(tmp))
+ return NULL;
+
+ suffix = name[len - 1];
+ if (strchr(arch->insn_suffix, suffix) == NULL)
+ return NULL;
+
+ strcpy(tmp, name);
+ tmp[len - 1] = '\0'; /* remove the suffix and check again */
+
+ ins = bsearch(tmp, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp);
+ }
return ins ? ins->ops : NULL;
}

--
2.41.0.rc0.172.g3f132b7071-goog