Re: [PATCH v5 9/9] perf, tools, script: Support total cycles count

From: Arnaldo Carvalho de Melo
Date: Wed Sep 19 2018 - 14:31:09 EST


Em Tue, Sep 18, 2018 at 05:32:14AM -0700, Andi Kleen escreveu:
> From: Andi Kleen <ak@xxxxxxxxxxxxxxx>
>
> For perf script brstackinsn also print a running cycles count.
> This makes it easier to calculate cycle deltas for code sections
> measured with LBRs.
>
> % perf record -b -a sleep 1
> % perf script -F +brstackinsn
> ...
> _dl_sysdep_start+330:
> 00007eff9f20583a insn: 75 c4 # PRED 24 cycles [24]
> 00007eff9f205800 insn: 48 83 e8 03
> 00007eff9f205804 insn: 48 83 f8 1e
> 00007eff9f205808 insn: 77 26
> 00007eff9f20580a insn: 48 63 04 81
> 00007eff9f20580e insn: 48 01 c8
> 00007eff9f205811 insn: ff e0 # MISPRED 31 cycles [7] 0.71 IPC
> 00007eff9f2059c0 insn: 44 8b 62 08
> 00007eff9f2059c4 insn: e9 67 fe ff ff # PRED 55 cycles [24] 0.04 IPC
> 00007eff9f205830 insn: 48 83 c2 10
> 00007eff9f205834 insn: 48 8b 02
> 00007eff9f205837 insn: 48 85 c0
> 00007eff9f20583a insn: 75 c4 # PRED 68 cycles [13] 0.23 IPC
>
> Signed-off-by: Andi Kleen <ak@xxxxxxxxxxxxxxx>
> ---
> tools/perf/builtin-script.c | 12 ++++++++----
> 1 file changed, 8 insertions(+), 4 deletions(-)
>
> diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> index 3b73c3226141..84895c861b84 100644
> --- a/tools/perf/builtin-script.c
> +++ b/tools/perf/builtin-script.c
> @@ -913,7 +913,7 @@ static int grab_bb(u8 *buffer, u64 start, u64 end,
>
> static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
> struct perf_insn *x, u8 *inbuf, int len,
> - int insn, FILE *fp)
> + int insn, FILE *fp, int *total_cycles)
> {
> int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t#%s%s%s%s", ip,
> dump_insn(x, ip, inbuf, len, NULL),
> @@ -922,7 +922,8 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
> en->flags.in_tx ? " INTX" : "",
> en->flags.abort ? " ABORT" : "");
> if (en->flags.cycles) {
> - printed += fprintf(fp, " %d cycles", en->flags.cycles);
> + *total_cycles += en->flags.cycles;
> + printed += fprintf(fp, " %d cycles [%d]", *total_cycles, en->flags.cycles);
> if (insn)
> printed += fprintf(fp, " %.2f IPC", (float)insn / en->flags.cycles);
> }
> @@ -979,6 +980,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
> u8 buffer[MAXBB];
> unsigned off;
> struct symbol *lastsym = NULL;
> + int total_cycles = 0;
>
> if (!(br && br->nr))
> return 0;
> @@ -999,7 +1001,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
> printed += ip__fprintf_sym(br->entries[nr - 1].from, thread,
> x.cpumode, x.cpu, &lastsym, attr, fp);
> printed += ip__fprintf_jump(br->entries[nr - 1].from, &br->entries[nr - 1],
> - &x, buffer, len, 0, fp);
> + &x, buffer, len, 0, fp, &total_cycles);
> }
>
> /* Print all blocks */
> @@ -1027,7 +1029,9 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
>
> printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
> if (ip == end) {
> - printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp);
> + printed += ip__fprintf_jump(ip, &br->entries[i],
> + &x, buffer + off, len - off,
> + insn, fp, &total_cycles);

Please don't reflow these things like that, just align the next line
with the new thing, parses quicker when reviewing.

- Arnaldo



> break;
> } else {
> printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", ip,
> --
> 2.17.1