[PATCH 22/48] perf annotate: Add --type-stat option for debugging

From: Namhyung Kim
Date: Wed Oct 11 2023 - 23:54:00 EST


The --type-stat option is to be used with --data-type and to print
detailed failure reasons for the data type annotation.

$ perf annotate --data-type --type-stat
Annotate data type stats:
total 294, ok 116 (39.5%), bad 178 (60.5%)
-----------------------------------------------------------
30 : no_sym
40 : no_insn_ops
33 : no_mem_ops
63 : no_var
4 : no_typeinfo
8 : bad_offset

Signed-off-by: Namhyung Kim <namhyung@xxxxxxxxxx>
---
tools/perf/builtin-annotate.c | 44 ++++++++++++++++++++++++++++++++-
tools/perf/util/annotate-data.c | 13 +++++++++-
tools/perf/util/annotate-data.h | 31 +++++++++++++++++++++++
tools/perf/util/annotate.c | 20 ++++++++++++---
4 files changed, 102 insertions(+), 6 deletions(-)

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 6be15a37d2b7..645acaba63f1 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -58,6 +58,7 @@ struct perf_annotate {
bool has_br_stack;
bool group_set;
bool data_type;
+ bool type_stat;
float min_percent;
const char *sym_hist_filter;
const char *cpu_list;
@@ -352,6 +353,43 @@ static void print_annotated_data_type(struct annotated_data_type *mem_type,
printf(";\n");
}

+static void print_annotate_data_stat(struct annotated_data_stat *s)
+{
+#define PRINT_STAT(fld) if (s->fld) printf("%10d : %s\n", s->fld, #fld)
+
+ int bad = s->no_sym +
+ s->no_insn +
+ s->no_insn_ops +
+ s->no_mem_ops +
+ s->no_reg +
+ s->no_dbginfo +
+ s->no_cuinfo +
+ s->no_var +
+ s->no_typeinfo +
+ s->invalid_size +
+ s->bad_offset;
+ int ok = s->total - bad;
+
+ printf("Annotate data type stats:\n");
+ printf("total %d, ok %d (%.1f%%), bad %d (%.1f%%)\n",
+ s->total, ok, 100.0 * ok / (s->total ?: 1), bad, 100.0 * bad / (s->total ?: 1));
+ printf("-----------------------------------------------------------\n");
+ PRINT_STAT(no_sym);
+ PRINT_STAT(no_insn);
+ PRINT_STAT(no_insn_ops);
+ PRINT_STAT(no_mem_ops);
+ PRINT_STAT(no_reg);
+ PRINT_STAT(no_dbginfo);
+ PRINT_STAT(no_cuinfo);
+ PRINT_STAT(no_var);
+ PRINT_STAT(no_typeinfo);
+ PRINT_STAT(invalid_size);
+ PRINT_STAT(bad_offset);
+ printf("\n");
+
+#undef PRINT_STAT
+}
+
static void hists__find_annotations(struct hists *hists,
struct evsel *evsel,
struct perf_annotate *ann)
@@ -359,6 +397,9 @@ static void hists__find_annotations(struct hists *hists,
struct rb_node *nd = rb_first_cached(&hists->entries), *next;
int key = K_RIGHT;

+ if (ann->type_stat)
+ print_annotate_data_stat(&ann_data_stat);
+
while (nd) {
struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
struct annotation *notes;
@@ -657,7 +698,8 @@ int cmd_annotate(int argc, const char **argv)
itrace_parse_synth_opts),
OPT_BOOLEAN(0, "data-type", &annotate.data_type,
"Show data type annotate for the memory accesses"),
-
+ OPT_BOOLEAN(0, "type-stat", &annotate.type_stat,
+ "Show stats for the data type annotation"),
OPT_END()
};
int ret;
diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c
index ba7d35648b05..3e30e6855ba8 100644
--- a/tools/perf/util/annotate-data.c
+++ b/tools/perf/util/annotate-data.c
@@ -28,6 +28,9 @@ struct annotated_data_type unknown_type = {
},
};

+/* Data type collection debug statistics */
+struct annotated_data_stat ann_data_stat;
+
/*
* Compare type name and size to maintain them in a tree.
* I'm not sure if DWARF would have information of a single type in many
@@ -206,6 +209,7 @@ static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset)
/* Get the type of the variable */
if (die_get_real_type(var_die, type_die) == NULL) {
pr_debug("variable has no type\n");
+ ann_data_stat.no_typeinfo++;
return -1;
}

@@ -216,18 +220,21 @@ static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset)
if (dwarf_tag(type_die) != DW_TAG_pointer_type ||
die_get_real_type(type_die, type_die) == NULL) {
pr_debug("no pointer or no type\n");
+ ann_data_stat.no_typeinfo++;
return -1;
}

/* Get the size of the actual type */
if (dwarf_aggregate_size(type_die, &size) < 0) {
pr_debug("type size is unknown\n");
+ ann_data_stat.invalid_size++;
return -1;
}

/* Minimal sanity check */
if ((unsigned)offset >= size) {
pr_debug("offset: %d is bigger than size: %lu\n", offset, size);
+ ann_data_stat.bad_offset++;
return -1;
}

@@ -246,6 +253,7 @@ static int find_data_type_die(struct debuginfo *di, u64 pc,
/* Get a compile_unit for this address */
if (!find_cu_die(di, pc, &cu_die)) {
pr_debug("cannot find CU for address %lx\n", pc);
+ ann_data_stat.no_cuinfo++;
return -1;
}

@@ -260,9 +268,12 @@ static int find_data_type_die(struct debuginfo *di, u64 pc,

/* Found a variable, see if it's correct */
ret = check_variable(&var_die, type_die, offset);
- break;
+ goto out;
}
+ if (ret < 0)
+ ann_data_stat.no_var++;

+out:
free(scopes);
return ret;
}
diff --git a/tools/perf/util/annotate-data.h b/tools/perf/util/annotate-data.h
index d2dc025b1934..8e73096c01d1 100644
--- a/tools/perf/util/annotate-data.h
+++ b/tools/perf/util/annotate-data.h
@@ -70,6 +70,37 @@ struct annotated_data_type {

extern struct annotated_data_type unknown_type;

+/**
+ * struct annotated_data_stat - Debug statistics
+ * @total: Total number of entry
+ * @no_sym: No symbol or map found
+ * @no_insn: Failed to get disasm line
+ * @no_insn_ops: The instruction has no operands
+ * @no_mem_ops: The instruction has no memory operands
+ * @no_reg: Failed to extract a register from the operand
+ * @no_dbginfo: The binary has no debug information
+ * @no_cuinfo: Failed to find a compile_unit
+ * @no_var: Failed to find a matching variable
+ * @no_typeinfo: Failed to get a type info for the variable
+ * @invalid_size: Failed to get a size info of the type
+ * @bad_offset: The access offset is out of the type
+ */
+struct annotated_data_stat {
+ int total;
+ int no_sym;
+ int no_insn;
+ int no_insn_ops;
+ int no_mem_ops;
+ int no_reg;
+ int no_dbginfo;
+ int no_cuinfo;
+ int no_var;
+ int no_typeinfo;
+ int invalid_size;
+ int bad_offset;
+};
+extern struct annotated_data_stat ann_data_stat;
+
#ifdef HAVE_DWARF_SUPPORT

/* Returns data type at the location (ip, reg, offset) */
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 3d9bb6b33e1a..72b867001e22 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -3649,11 +3649,17 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he)
u64 ip = he->ip;
int i;

- if (ms->map == NULL || ms->sym == NULL)
+ ann_data_stat.total++;
+
+ if (ms->map == NULL || ms->sym == NULL) {
+ ann_data_stat.no_sym++;
return NULL;
+ }

- if (evsel__get_arch(evsel, &arch) < 0)
+ if (evsel__get_arch(evsel, &arch) < 0) {
+ ann_data_stat.no_insn++;
return NULL;
+ }

/* Make sure it runs objdump to get disasm of the function */
symbol__ensure_annotate(ms, evsel);
@@ -3663,11 +3669,15 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he)
* This is too slow...
*/
dl = find_disasm_line(ms->sym, ip);
- if (dl == NULL)
+ if (dl == NULL) {
+ ann_data_stat.no_insn++;
return NULL;
+ }

- if (annotate_get_insn_location(arch, dl, &loc) < 0)
+ if (annotate_get_insn_location(arch, dl, &loc) < 0) {
+ ann_data_stat.no_insn_ops++;
return NULL;
+ }

for_each_insn_op_loc(&loc, i, op_loc) {
if (!op_loc->mem_ref)
@@ -3684,5 +3694,7 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he)
he->mem_type_off = op_loc->offset;
return mem_type;
}
+
+ ann_data_stat.no_mem_ops++;
return NULL;
}
--
2.42.0.655.g421f12c284-goog