Re: [PATCH/RFC 1/4] perf annotate: Add --source-only option

From: Taeung Song
Date: Fri Jun 30 2017 - 11:44:44 EST


Thank you for detailed review.

On 06/30/2017 12:18 AM, Namhyung Kim wrote:
Resend with a proper mail address..

On Fri, Jun 30, 2017 at 12:14 AM, Namhyung Kim <namhyung@xxxxxxxxxx> wrote:
Hi Taeung,

Thanks for doing this work!

On Wed, Jun 28, 2017 at 12:14 PM, Taeung Song <treeze.taeung@xxxxxxxxx> wrote:
The option can show the result of performance analysis
based on full source code per symbol(function).
This view can be a precheck before assembly code level
and be the additional useful view point.

For example, if target symbol is 'hex2u64' of util/util.c,
the output is like below.

$ perf annotate --source-only --stdio -s hex2u64
Percent | Source code of util.c for cycles:ppp (42 samples)
-----------------------------------------------------------------
0.00 : 354 * While we find nice hex chars, build a long_val.
0.00 : 355 * Return number of chars processed.
0.00 : 356 */
0.00 : 357 int hex2u64(const char *ptr, u64 *long_val)
2.38 : 358 {
2.38 : 359 const char *p = ptr;
0.00 : 360 *long_val = 0;
0.00 : 361
30.95 : 362 while (*p) {
23.81 : 363 const int hex_val = hex(*p);
0.00 : 364
14.29 : 365 if (hex_val < 0)
0.00 : 366 break;
0.00 : 367
26.19 : 368 *long_val = (*long_val << 4) | hex_val;
0.00 : 369 p++;
0.00 : 370 }
0.00 : 371
0.00 : 372 return p - ptr;
0.00 : 373 }

Suggested-by: Namhyung Kim <namhyung@xxxxxxxxxx>
Cc: Milian Wolff <milian.wolff@xxxxxxxx>
Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
Cc: Masami Hiramatsu <mhiramat@xxxxxxxxxx>
Cc: Adrian Hunter <adrian.hunter@xxxxxxxxx>
Cc: Wang Nan <wangnan0@xxxxxxxxxx>
Cc: Jin Yao <yao.jin@xxxxxxxxxxxxxxx>
Cc: Andi Kleen <ak@xxxxxxxxxxxxxxx>
Cc: Kim Phillips <kim.phillips@xxxxxxx>
Cc: David Ahern <dsahern@xxxxxxxxx>
Signed-off-by: Taeung Song <treeze.taeung@xxxxxxxxx>
---
tools/perf/builtin-annotate.c | 2 +
tools/perf/ui/browsers/annotate.c | 5 -
tools/perf/util/annotate.c | 303 +++++++++++++++++++++++++++++++++++++-
tools/perf/util/annotate.h | 22 +++
tools/perf/util/symbol.c | 1 +
tools/perf/util/symbol.h | 1 +
6 files changed, 325 insertions(+), 9 deletions(-)

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 7a5dc7e..054c74f 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -436,6 +436,8 @@ int cmd_annotate(int argc, const char **argv)
symbol__config_symfs),
OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src,
"Interleave source code with assembly code (default)"),
+ OPT_BOOLEAN(0, "source-only", &symbol_conf.annotate_src_only,
+ "Display source code for each symbol"),
OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw,
"Display raw encoding of assembly instructions (default)"),
OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 27f41f2..b075a32 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -14,11 +14,6 @@
#include <linux/kernel.h>
#include <sys/ttydefaults.h>

-struct disasm_line_samples {
- double percent;
- u64 nr;
-};
-
#define IPC_WIDTH 6
#define CYCLES_WIDTH 6

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index be1caab..4108520 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1379,6 +1379,277 @@ static const char *annotate__norm_arch(const char *arch_name)
return normalize_arch((char *)arch_name);
}

+static int parse_srcline(char *srcline, char **path, int *line_nr)
+{
+ char *sep;
+
+ if (srcline == NULL || !strcmp(srcline, SRCLINE_UNKNOWN))
+ return -1;
+
+ sep = strchr(srcline, ':');
+ if (sep) {
+ *sep = '\0';
+ if (path)
+ *path = srcline;
+ *line_nr = strtoul(++sep, NULL, 0);
+ } else
+ return -1;
+
+ return 0;
+}
+
+static void code_line__free(struct code_line *cl)
+{
+ zfree(&cl->line);
+ zfree(&cl->matched_dl);
+ zfree(&cl->samples_sum);
+ free(cl);
+}
+
+static void code_lines__free(struct list_head *code_lines)
+{
+ struct code_line *pos, *tmp;
+
+ if (list_empty(code_lines))
+ return;
+
+ list_for_each_entry_safe(pos, tmp, code_lines, node) {
+ list_del_init(&pos->node);
+ code_line__free(pos);
+ }
+}
+
+static int symbol__free_source_code(struct symbol *sym)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct source_code *code = notes->src->code;
+
+ if (code == NULL)
+ return -1;
+
+ code_lines__free(&code->lines);
+ zfree(&code->path);
+ zfree(&notes->src->code);
+ return 0;
+}
+
+static void code_line__sum_samples(struct code_line *cl, struct disasm_line *dl,
+ struct annotation *notes, struct perf_evsel *evsel)
+{
+ int i;
+ u64 nr_samples;
+ struct sym_hist *h;
+ struct source_code *code = notes->src->code;
+
+ for (i = 0; i < code->nr_events; i++) {
+ double percent = 0.0;
+
+ h = annotation__histogram(notes, evsel->idx + i);
+ nr_samples = h->addr[dl->offset];
+ if (h->sum)
+ percent = 100.0 * nr_samples / h->sum;
+
+ cl->samples_sum[i].percent += percent;
+ cl->samples_sum[i].nr += nr_samples;
+ }
+}
+
+static void source_code__print(struct code_line *cl, int nr_events,
+ struct annotation *notes, struct perf_evsel *evsel)
+{
+ int i;
+ const char *color;
+ double percent, max_percent = 0.0;
+
+ for (i = 0; i < cl->nr_matched_dl; i++)
+ code_line__sum_samples(cl, cl->matched_dl[i], notes, evsel);
+
+ for (i = 0; i < nr_events; i++) {
+ percent = cl->samples_sum[i].percent;
+ color = get_percent_color(percent);
+ if (max_percent < percent)
+ max_percent = percent;
+
+ if (symbol_conf.show_total_period)
+ color_fprintf(stdout, color, " %7" PRIu64,
+ cl->samples_sum[i].nr);
+ else
+ color_fprintf(stdout, color, " %7.2f", percent);
+ }
+ color = get_percent_color(max_percent);
+ color_fprintf(stdout, color, " : %d %s\n",
+ cl->line_nr, cl->line);
+}
+
+static int code_line__match_with_dl(struct symbol *sym, struct map *map,
+ struct code_line *cl)
+{
+ int nr_dl = 0;
+ size_t allocated = sizeof(struct disasm_line **);

I don't like the name 'allocated'. It's a size of a pointer to
disasm_line - so should be single "*" not "**" IMHO. What about just
'sizeof_dl'?


I think you are right, will change it.


+ u64 start = map__rip_2objdump(map, sym->start);
+ struct annotation *notes = symbol__annotation(sym);
+ struct disasm_line *pos, **matched_dl = zalloc(allocated);

You can simply initialize the 'matched_dl' to NULL ...

+
+ if (!matched_dl)
+ return -1;
+
+ list_for_each_entry(pos, &notes->src->source, node) {
+ int line_nr;
+ u64 offset;
+ char *srcline;
+
+ if (pos->offset == -1)
+ continue;
+
+ offset = pos->offset + start;
+ srcline = get_srcline(map->dso, offset, NULL, false, true);
+
+ if (parse_srcline(srcline, NULL, &line_nr) < 0)
+ continue;
+
+ if (line_nr == cl->line_nr) {
+ if (nr_dl > 0) {
+ struct disasm_line **tmp =
+ realloc(matched_dl, allocated * (nr_dl + 1));

... since realloc() can handle NULL. So the if statement is not needed.


I got it.

Also I think the srcline might be from a different source file. You'd
be better checking the filename as well.

Yep, I found this problem on the case Milian told me..
will fix it.


+
+ if (!tmp) {
+ free(matched_dl);
+ return -1;
+ }
+ matched_dl = tmp;
+ }
+ matched_dl[nr_dl++] = pos;
+ }
+
+ if (nr_dl && (line_nr != cl->line_nr))
+ break;

Hmm.. you seem to assume that the disasm lines having a same srcline
is contiguous. But it would not be true for an optimized code..

Ah, my fault, I'll also modify it.



+ }
+
+ cl->matched_dl = matched_dl;
+ cl->nr_matched_dl = nr_dl;
+ return 0;
+}
+
+static struct code_line *code_line__new(char *line, int linenr, int nr_events)
+{
+ struct code_line *cl = zalloc(sizeof(*cl));
+
+ if (!cl)
+ return NULL;
+
+ cl->line_nr = linenr;
+ cl->line = strdup(line);

This also can fail.


Okey, will add if statement for the fail case.

+ cl->samples_sum =
+ zalloc(sizeof(struct disasm_line_samples) * nr_events);
+ if (!cl->samples_sum)
+ zfree(&cl);
+
+ return cl;
+}
+
+static int source_code__collect(struct symbol *sym, struct map *map,
+ struct source_code *code, char *path,
+ int start_linenr, int end_linenr)
+{
+ int ret = -1, linenr = 0;
+ char *line = NULL, *parsed_line;
+ size_t len;
+ FILE *file;
+ struct code_line *cl;
+
+ file = fopen(path, "r");
+ if (!file)
+ return -1;
+
+ if (srcline_full_filename)
+ code->path = strdup(path);
+ else
+ code->path = strdup(basename(path));
+
+ INIT_LIST_HEAD(&code->lines);
+ while (!feof(file)) {
+ if (getline(&line, &len, file) < 0)
+ goto out_err;
+
+ if (++linenr < start_linenr)
+ continue;
+
+ parsed_line = rtrim(line);
+ cl = code_line__new(parsed_line, linenr, code->nr_events);
+ if (!cl)
+ goto out_err;
+
+ if (code_line__match_with_dl(sym, map, cl) < 0)
+ goto out_err;
+
+ list_add_tail(&cl->node, &code->lines);
+ if (linenr == end_linenr) {
+ ret = 0;
+ goto out;
+ }
+ }
+out_err:
+ code_lines__free(&code->lines);
+out:
+ free(line);
+ fclose(file);
+ return ret;
+}
+
+static int symbol__get_source_code(struct symbol *sym, struct map *map,
+ struct perf_evsel *evsel)
+{
+ int start_linenr, end_linenr, ret = -1;
+ char *path, *start_srcline = NULL, *end_srcline = NULL;
+ u64 start = map__rip_2objdump(map, sym->start);
+ u64 end = map__rip_2objdump(map, sym->end - 1);
+ struct annotation *notes = symbol__annotation(sym);
+ struct source_code *code;
+
+ if (map->dso->symsrc_filename) {
+ bool tmp = srcline_full_filename;
+
+ srcline_full_filename = true;
+ start_srcline = get_srcline(map->dso, start, NULL, false, true);
+ end_srcline = get_srcline(map->dso, end, NULL, false, true);

I'm not sure it's guaranteed that the last instruction is always at
the last line of the source code.

Thanks,
Namhyung



Understood the case you said.
Hum.. I thought a simple way but I'm not sure that it is good.

Is it nice to find a last line number
after collecting each srcline info by get_srcline()
with all disas lines for a symbol(function) ?

What do you think about the way getting the last line number
of the source code file ? Are there other workarounds ?


Thanks,
Taeung

+ srcline_full_filename = tmp;
+ } else
+ return -1;
+
+ if (parse_srcline(start_srcline, &path, &start_linenr) < 0)
+ goto out;
+ if (parse_srcline(end_srcline, &path, &end_linenr) < 0)
+ goto out;
+
+ code = zalloc(sizeof(struct source_code));
+ if (code == NULL)
+ goto out;
+
+ if (perf_evsel__is_group_event(evsel))
+ code->nr_events = evsel->nr_members;
+ else
+ code->nr_events = 1;
+
+ /* To roughly read a function header */
+ if (start_linenr > 4)
+ start_linenr -= 4;
+ else
+ start_linenr = 1;
+
+ if (source_code__collect(sym, map, code, path, start_linenr,
+ end_linenr) < 0) {
+ zfree(&code);
+ goto out;
+ }
+
+ ret = 0;
+ notes->src->code = code;
+out:
+ free_srcline(start_srcline);
+ free_srcline(end_srcline);
+ return ret;
+}
+
int symbol__disassemble(struct symbol *sym, struct map *map,
const char *arch_name, size_t privsize,
struct arch **parch)
@@ -1509,7 +1780,6 @@ int symbol__disassemble(struct symbol *sym, struct map *map,

if (nline == 0)
pr_err("No output from %s\n", command);
-
/*
* kallsyms does not have symbol sizes so there may a nop at the end.
* Remove it.
@@ -1768,6 +2038,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map,
struct sym_hist *h = annotation__histogram(notes, evsel->idx);
struct disasm_line *pos, *queue = NULL;
u64 start = map__rip_2objdump(map, sym->start);
+ bool src_code_only = false;
int printed = 2, queue_len = 0;
int more = 0;
u64 len;
@@ -1788,8 +2059,14 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map,
if (perf_evsel__is_group_event(evsel))
width *= evsel->nr_members;

- graph_dotted_len = printf(" %-*.*s| Source code & Disassembly of %s for %s (%" PRIu64 " samples)\n",
- width, width, "Percent", d_filename, evsel_name, h->sum);
+ if (symbol_conf.annotate_src_only && notes->src->has_src_code)
+ src_code_only = true;
+
+ graph_dotted_len = printf(" %-*.*s| %s of %s for %s (%" PRIu64 " samples)\n",
+ width, width, "Percent",
+ src_code_only ? "Source code" : "Source code & Disassembly",
+ src_code_only ? notes->src->code->path : d_filename,
+ evsel_name, h->sum);

printf("%-*.*s----\n",
graph_dotted_len, graph_dotted_len, graph_dotted_line);
@@ -1797,6 +2074,16 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map,
if (verbose > 0)
symbol__annotate_hits(sym, evsel);

+ if (src_code_only) {
+ struct source_code *code = notes->src->code;
+ struct code_line *cl;
+
+ list_for_each_entry(cl, &code->lines, node)
+ source_code__print(cl, code->nr_events, notes, evsel);
+
+ goto out;
+ }
+
list_for_each_entry(pos, &notes->src->source, node) {
if (context && queue == NULL) {
queue = pos;
@@ -1833,7 +2120,8 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map,
break;
}
}
-
+out:
+ printf("\n");
free(filename);

return more;
@@ -1903,6 +2191,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map,
bool full_paths, int min_pcnt, int max_lines)
{
struct dso *dso = map->dso;
+ struct annotation *notes = symbol__annotation(sym);
struct rb_root source_line = RB_ROOT;
u64 len;

@@ -1918,11 +2207,17 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map,
print_summary(&source_line, dso->long_name);
}

+ if (symbol_conf.annotate_src_only &&
+ symbol__get_source_code(sym, map, evsel) == 0)
+ notes->src->has_src_code = true;
+
symbol__annotate_printf(sym, map, evsel, full_paths,
min_pcnt, max_lines, 0);
if (print_lines)
symbol__free_source_line(sym, len);

+ if (notes->src->has_src_code)
+ symbol__free_source_code(sym);
disasm__purge(&symbol__annotation(sym)->src->source);

return 0;
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 2105503..3513807 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -56,6 +56,11 @@ int ins__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *

struct annotation;

+struct disasm_line_samples {
+ double percent;
+ u64 nr;
+};
+
struct disasm_line {
struct list_head node;
s64 offset;
@@ -95,6 +100,21 @@ struct cyc_hist {
u16 reset;
};

+struct code_line {
+ struct list_head node;
+ int line_nr;
+ char *line;
+ int nr_matched_dl;
+ struct disasm_line **matched_dl;
+ struct disasm_line_samples *samples_sum;
+};
+
+struct source_code {
+ char *path;
+ int nr_events;
+ struct list_head lines;
+};
+
struct source_line_samples {
double percent;
double percent_sum;
@@ -123,7 +143,9 @@ struct source_line {
*/
struct annotated_source {
struct list_head source;
+ struct source_code *code;
struct source_line *lines;
+ bool has_src_code;
int nr_histograms;
size_t sizeof_sym_hist;
struct cyc_hist *cycles_hist;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index e7a98db..ca8dc6e 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -38,6 +38,7 @@ struct symbol_conf symbol_conf = {
.use_modules = true,
.try_vmlinux_path = true,
.annotate_src = true,
+ .annotate_src_only = false,
.demangle = true,
.demangle_kernel = false,
.cumulate_callchain = true,
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 41ebba9..a7ba20b 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -108,6 +108,7 @@ struct symbol_conf {
kptr_restrict,
annotate_asm_raw,
annotate_src,
+ annotate_src_only,
event_group,
demangle,
demangle_kernel,
--
2.7.4