[PATCH 47/68] perf c2c report: Allow to report callchains

From: Arnaldo Carvalho de Melo
Date: Tue Oct 11 2016 - 13:36:25 EST


From: Jiri Olsa <jolsa@xxxxxxxxxx>

Add --call-graph option to properly setup callchain code. Adding default
settings to display callchains whenever they are stored in the
perf.data.

Committer Notes:

Testing it:

[root@jouet ~]# perf c2c record -a -g sleep 5
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 5.331 MB perf.data (4263 samples) ]
[root@jouet ~]# perf evlist -v
cpu/mem-loads,ldlat=30/P: type: 4, size: 112, config: 0x1cd, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|CALLCHAIN|ID|CPU|PERIOD|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, precise_ip: 3, mmap_data: 1, sample_id_all: 1, mmap2: 1, comm_exec: 1, { bp_addr, config1 }: 0x1f
cpu/mem-stores/P: type: 4, size: 112, config: 0x82d0, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|CALLCHAIN|ID|CPU|PERIOD|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, freq: 1, precise_ip: 3, sample_id_all: 1
[root@jouet ~]# perf c2c report --stats
=================================================
Trace Event Information
=================================================
Total records : 4263
Locked Load/Store Operations : 220
Load Operations : 2130
Loads - uncacheable : 1
Loads - IO : 7
Loads - Miss : 86
Loads - no mapping : 5
Load Fill Buffer Hit : 609
Load L1D hit : 612
=================================================
Trace Event Information
=================================================
Total records : 4263
Locked Load/Store Operations : 220
Load Operations : 2130
Loads - uncacheable : 1
Loads - IO : 7
Loads - Miss : 86
Loads - no mapping : 5
Load Fill Buffer Hit : 609
Load L1D hit : 612
Load L2D hit : 27
Load LLC hit : 607
Load Local HITM : 15
Load Remote HITM : 0
Load Remote HIT : 0
Load Local DRAM : 176
Load Remote DRAM : 0
Load MESI State Exclusive : 176
Load MESI State Shared : 0
Load LLC Misses : 176
LLC Misses to Local DRAM : 100.0%
LLC Misses to Remote DRAM : 0.0%
LLC Misses to Remote cache (HIT) : 0.0%
LLC Misses to Remote cache (HITM) : 0.0%
Store Operations : 2133
Store - uncacheable : 0
Store - no mapping : 1
Store L1D Hit : 1967
Store L1D Miss : 165
No Page Map Rejects : 145
Unable to parse data source : 0

=================================================
Global Shared Cache Line Event Information
=================================================
Total Shared Cache Lines : 15
Load HITs on shared lines : 26
Fill Buffer Hits on shared lines : 7
L1D hits on shared lines : 3
L2D hits on shared lines : 0
LLC hits on shared lines : 16
Locked Access on shared lines : 2
Store HITs on shared lines : 8
Store L1D hits on shared lines : 7
Total Merged records : 23

=================================================
c2c details
=================================================
Events : cpu/mem-loads,ldlat=30/P
: cpu/mem-stores/P
[root@jouet ~]#

[root@jouet ~]# perf c2c report
Shared Data Cache Line Table (2378 entries)
Total --- LLC Load Hitm -- -- Store Reference - - Load Dram - LLC Total - Core Load Hit -
Cacheline records %hitm Total Lcl Rmt Total L1Hit L1Miss Lcl Rmt Ld Miss Loads FB L1 L2
- 0xffff880024380c00 10 0.00% 0 0 0 6 6 0 0 0 0 4 1 3 0
- 0.13% _raw_spin_lock_irqsave
- 0.07% ep_poll
sys_epoll_wait
do_syscall_64
return_from_SYSCALL_64
+ 0x103573
- 0.05% ep_poll_callback
__wake_up_common
- __wake_up_sync_key
- 0.02% pipe_read
__vfs_read
vfs_read
sys_read
do_syscall_64
return_from_SYSCALL_64
0xfdad
+ 0.02% sock_def_readable
+ 0.02% ep_scan_ready_list.constprop.12
+ 0.00% mutex_lock
+ 0.00% __wake_up_common
+ 0xffff880024380c40 1 0.00% 0 0 0 1 1 0 0 0 0 0 0 0 0
+ 0xffff880024380c80 1 0.00% 0 0 0 0 0 0 0 0 0 1 0 0 0
- 0xffff8800243e9f00 1 0.00% 0 0 0 1 1 0 0 0 0 0 0 0 0
enqueue_entity
enqueue_task_fair
activate_task
ttwu_do_activate
try_to_wake_up
wake_up_process
hrtimer_wakeup
__hrtimer_run_queues
hrtimer_interrupt
local_apic_timer_interrupt
smp_apic_timer_interrupt
apic_timer_interrupt
cpuidle_enter
call_cpuidle
help

-------------

And when presing 'd' to see the cacheline details:

Cacheline 0xffff880024380c00
----- HITM ----- -- Store Refs -- --------- cycles ----- cpu
Rmt Lcl L1 Hit L1 Miss Off Pid Tid rmt hitm lcl hitm load cnt Symbol
- 0.00% 0.00% 100.00% 0.00% 0x0 1473 1474:Chrome_ChildIOT 0 0 41 2 [k] _raw_spin_lock_irqsave [kernel]
- _raw_spin_lock_irqsave
- 51.52% ep_poll
sys_epoll_wait
do_syscall_64
return_from_SYSCALL_64
- 0x103573
47.19% 0
4.33% 0xc30bd
- 35.93% ep_poll_callback
__wake_up_common
- __wake_up_sync_key
- 18.20% pipe_read
__vfs_read
vfs_read
sys_read
do_syscall_64
return_from_SYSCALL_64
0xfdad
- 17.73% sock_def_readable
unix_stream_sendmsg
sock_sendmsg
___sys_sendmsg
__sys_sendmsg
sys_sendmsg
do_syscall_64
return_from_SYSCALL_64
__GI___libc_sendmsg
0x12c036af1fc0
0x16a4050
0x894928ec83485354
+ 12.45% ep_scan_ready_list.constprop.12
+ 0.00% 0.00% 0.00% 0.00% 0x8 1473 1474:Chrome_ChildIOT 0 0 102 1 [k] mutex_lock [kernel]
+ 0.00% 0.00% 0.00% 0.00% 0x38 1473 1473:chrome 0 0 88 1 [k] __wake_up_common [kernel]

help

Signed-off-by: Jiri Olsa <jolsa@xxxxxxxxxx>
Tested-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
Cc: Andi Kleen <andi@xxxxxxxxxxxxxx>
Cc: David Ahern <dsahern@xxxxxxxxx>
Cc: Don Zickus <dzickus@xxxxxxxxxx>
Cc: Joe Mario <jmario@xxxxxxxxxx>
Cc: Namhyung Kim <namhyung@xxxxxxxxxx>
Cc: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
Link: http://lkml.kernel.org/n/tip-inykbom2f19difvsu1e18avr@xxxxxxxxxxxxxx
Signed-off-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
---
tools/perf/builtin-c2c.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 67 insertions(+)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index c36d1dc668ac..d51a6c3515cb 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -17,6 +17,7 @@
#include "evsel.h"
#include <asm/bug.h>
#include "ui/browsers/hists.h"
+#include "evlist.h"

struct c2c_hists {
struct hists hists;
@@ -183,6 +184,11 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
return -1;
}

+ ret = sample__resolve_callchain(sample, &callchain_cursor, NULL,
+ evsel, &al, sysctl_perf_event_max_stack);
+ if (ret)
+ goto out;
+
mi = sample__resolve_mem(sample, &al);
if (mi == NULL)
return -ENOMEM;
@@ -2117,6 +2123,58 @@ static void ui_quirks(void)
}
}

+#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent"
+
+const char callchain_help[] = "Display call graph (stack chain/backtrace):\n\n"
+ CALLCHAIN_REPORT_HELP
+ "\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT;
+
+static int
+parse_callchain_opt(const struct option *opt, const char *arg, int unset)
+{
+ struct callchain_param *callchain = opt->value;
+
+ callchain->enabled = !unset;
+ /*
+ * --no-call-graph
+ */
+ if (unset) {
+ symbol_conf.use_callchain = false;
+ callchain->mode = CHAIN_NONE;
+ return 0;
+ }
+
+ return parse_callchain_report_opt(arg);
+}
+
+static int setup_callchain(struct perf_evlist *evlist)
+{
+ u64 sample_type = perf_evlist__combined_sample_type(evlist);
+ enum perf_call_graph_mode mode = CALLCHAIN_NONE;
+
+ if ((sample_type & PERF_SAMPLE_REGS_USER) &&
+ (sample_type & PERF_SAMPLE_STACK_USER))
+ mode = CALLCHAIN_DWARF;
+ else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+ mode = CALLCHAIN_LBR;
+ else if (sample_type & PERF_SAMPLE_CALLCHAIN)
+ mode = CALLCHAIN_FP;
+
+ if (!callchain_param.enabled &&
+ callchain_param.mode != CHAIN_NONE &&
+ mode != CALLCHAIN_NONE) {
+ symbol_conf.use_callchain = true;
+ if (callchain_register_param(&callchain_param) < 0) {
+ ui__error("Can't register callchain params.\n");
+ return -EINVAL;
+ }
+ }
+
+ callchain_param.record_mode = mode;
+ callchain_param.min_percent = 0;
+ return 0;
+}
+
static int perf_c2c__report(int argc, const char **argv)
{
struct perf_session *session;
@@ -2124,6 +2182,7 @@ static int perf_c2c__report(int argc, const char **argv)
struct perf_data_file file = {
.mode = PERF_DATA_MODE_READ,
};
+ char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT;
const struct option c2c_options[] = {
OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
"file", "vmlinux pathname"),
@@ -2138,6 +2197,10 @@ static int perf_c2c__report(int argc, const char **argv)
#endif
OPT_BOOLEAN(0, "stats", &c2c.stats_only,
"Use the stdio interface"),
+ OPT_CALLBACK_DEFAULT('g', "call-graph", &callchain_param,
+ "print_type,threshold[,print_limit],order,sort_key[,branch],value",
+ callchain_help, &parse_callchain_opt,
+ callchain_default_opt),
OPT_END()
};
int err = 0;
@@ -2179,6 +2242,10 @@ static int perf_c2c__report(int argc, const char **argv)
goto out;
}

+ err = setup_callchain(session->evlist);
+ if (err)
+ goto out_session;
+
if (symbol__init(&session->header.env) < 0)
goto out_session;

--
2.7.4