Re: [PATCH RFC V8 3/4] perf,tools: per-event callgraph support
From: Jiri Olsa
Date: Wed Aug 05 2015 - 03:28:56 EST
On Tue, Aug 04, 2015 at 04:30:21AM -0400, Kan Liang wrote:
> From: Kan Liang <kan.liang@xxxxxxxxx>
>
> When multiple events are sampled it may not be needed to collect
> callgraphs for all of them. The sample sites are usually nearby, and
> it's enough to collect the callgraphs on a reference event (such as
> precise cycles or precise instructions).
> This patchkit adds the ability to turn off callgraphs and time stamp
> per event. This in term can reduce sampling overhead and the size of the
> perf.data. Furthermore, it makes collecting back traces and timestamps
> possible when PEBS threshold > 1, which significantly reducing the
> sampling overhead especially for frequently occurring events
> (https://lkml.org/lkml/2015/5/10/196). For example, A slower event with
> a larger period collects back traces/timestamps. Other more events run
> fast with multi-pebs. The time stamps from the slower events can be used
> to order the faster events. Their backtraces can give the user enough
> hint to find the right spot.
>
> Here are some examples and test results.
>
> 1. Comparing the elapsed time and perf.data size from "kernbench -M -H".
>
> The test command for FULL callgraph and time support.
> "perf record -e
> '{cpu/cpu-cycles,period=100000/,cpu/instructions,period=20000/p}'
> --call-graph fp --time"
>
> The test command for PARTIAL callgraph and time support.
> "perf record -e
> '{cpu/cpu-cycles,call-graph=fp,time,period=100000/,
> cpu/instructions,call-graph=no,time=0,period=20000/p}'"
>
> The elapsed time for FULL is 24.3 Sec, while for PARTIAL is 16.9 Sec.
> The perf.data size for FULL is 22.1 Gb, while for PARTIAL is 12.4 Gb.
>
> 2. Comparing the perf.data size and callgraph results.
>
> The test command for FULL callgraph and time support.
> "perf record -e
> '{cpu/cpu-cycles,period=100000/pp,cpu/instructions,period=20000/p}'
> --call-graph fp -- ./tchain_edit"
>
> The test command for PARTIAL callgraph and time support.
> "perf record -e
> '{cpu/cpu-cycles,call-graph=fp,time,period=100000/pp,
> cpu/instructions,call-graph=no,time=0,period=20000/p}'
> -- ./tchain_edit"
>
> The perf.data size for FULL is 43.2 MB, while for PARTIAL is 21.1 MB.
> The callgraph is roughly the same.
>
> The callgraph from FULL
> # Samples: 87K of event
> 'cpu/cpu-cycles,call-graph=fp,time,period=100000/pp'
> # Event count (approx.): 8760000000
> #
> # Children Self Command Shared Object Symbol
> # ........ ........ ........... ..................
> ..........................................
> #
> 99.98% 0.00% tchain_edit libc-2.15.so [.]
> __libc_start_main
> |
> ---__libc_start_main
>
> 99.97% 0.00% tchain_edit tchain_edit [.] main
> |
> ---main
> __libc_start_main
>
> 99.97% 0.00% tchain_edit tchain_edit [.] f1
> |
> ---f1
> main
> __libc_start_main
>
> 99.85% 87.01% tchain_edit tchain_edit [.] f3
> |
> ---f3
> |
> |--99.74%-- f2
> | f1
> | main
> | __libc_start_main
> --0.26%-- [...]
> 99.71% 0.12% tchain_edit tchain_edit [.] f2
> |
> ---f2
> f1
> main
> __libc_start_main
>
> The callgraph from PARTIAL
> # Samples: 417K of event
> 'cpu/instructions,call-graph=no,time=0,period=20000/p'
> # Event count (approx.): 8346980000
> #
> # Children Self Command Shared Object Symbol
> # ........ ........ ........... ................
> ..........................................
> #
> 98.82% 0.00% tchain_edit libc-2.15.so [.]
> __libc_start_main
> |
> ---__libc_start_main
>
> 98.82% 0.00% tchain_edit tchain_edit [.] main
> |
> ---main
> __libc_start_main
>
> 98.82% 0.00% tchain_edit tchain_edit [.] f1
> |
> ---f1
> main
> __libc_start_main
>
> 98.82% 98.28% tchain_edit tchain_edit [.] f3
> |
> ---f3
> |
> |--0.53%-- f2
> | f1
> | main
> | __libc_start_main
> |
> |--0.01%-- f1
> | main
> | __libc_start_main
> --99.46%-- [...]
> 97.63% 0.03% tchain_edit tchain_edit [.] f2
> |
> ---f2
> f1
> main
> __libc_start_main
>
> 7.13% 0.03% tchain_edit [kernel.vmlinux] [k] do_nmi
> |
> ---do_nmi
> end_repeat_nmi
> f3
> f2
> f1
> main
> __libc_start_main
>
> Signed-off-by: Kan Liang <kan.liang@xxxxxxxxx>
> ---
> tools/perf/Documentation/perf-record.txt | 4 ++
> tools/perf/util/evsel.c | 65 +++++++++++++++++++++++++++++++-
> tools/perf/util/evsel.h | 4 ++
> tools/perf/util/parse-events.c | 12 ++++++
> tools/perf/util/parse-events.h | 2 +
> tools/perf/util/parse-events.l | 2 +
> tools/perf/util/pmu.c | 3 +-
> 7 files changed, 89 insertions(+), 3 deletions(-)
>
> diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
> index 0d852d1..e633711 100644
> --- a/tools/perf/Documentation/perf-record.txt
> +++ b/tools/perf/Documentation/perf-record.txt
> @@ -52,6 +52,10 @@ OPTIONS
> - 'time': Disable/enable time stamping. Acceptable values are 1 for
> enabling time stamping. 0 for disabling time stamping.
> The default is 1.
> + - 'call-graph': Disable/enable callgraph. Acceptable str are "fp" for
> + FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode and
> + "no" for disable callgraph.
> + - 'stack-size': user stack size for dwarf mode
> Note: If user explicitly sets options which conflict with the params,
> the value set by the params will be overridden.
>
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index f572f46..106cd20 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -588,11 +588,36 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel,
> }
> }
>
> -static void apply_config_terms(struct perf_evsel *evsel)
> +static void
> +perf_evsel__reset_callgraph(struct perf_evsel *evsel,
> + struct callchain_param *param)
> +{
> + struct perf_event_attr *attr = &evsel->attr;
> +
> + perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
> + if (param->record_mode == CALLCHAIN_LBR) {
> + perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
> + attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
> + PERF_SAMPLE_BRANCH_CALL_STACK);
> + }
> + if (param->record_mode == CALLCHAIN_DWARF) {
> + perf_evsel__reset_sample_bit(evsel, REGS_USER);
> + perf_evsel__reset_sample_bit(evsel, STACK_USER);
> + }
> +}
> +
> +static void apply_config_terms(struct perf_evsel *evsel,
> + struct record_opts *opts)
> {
> struct perf_evsel_config_term *term;
> struct list_head *config_terms = &evsel->config_terms;
> struct perf_event_attr *attr = &evsel->attr;
> + struct callchain_param param;
> + u32 dump_size = 0;
> + char *callgraph_buf = NULL;
> +
> + /* callgraph default */
> + param.record_mode = callchain_param.record_mode;
>
> list_for_each_entry(term, config_terms, list) {
> switch (term->type) {
> @@ -605,10 +630,46 @@ static void apply_config_terms(struct perf_evsel *evsel)
> else
> perf_evsel__reset_sample_bit(evsel, TIME);
> break;
> + case PERF_EVSEL__CONFIG_TERM_CALLGRAPH:
> + callgraph_buf = term->val.callgraph;
> + break;
> + case PERF_EVSEL__CONFIG_TERM_STACK_USER:
> + dump_size = term->val.stack_user;
> + break;
> default:
> break;
> }
> }
> +
> + /* User explicitly set per-event callgraph, clear the old setting and reset. */
> + if ((callgraph_buf != NULL) || (dump_size > 0)) {
> +
> + /* parse callgraph parameters */
> + if (callgraph_buf != NULL) {
> + if (!strcmp(callgraph_buf, "no")) {
> + param.enabled = false;
> + param.record_mode = CALLCHAIN_NONE;
> + } else {
> + param.enabled = true;
> + if (parse_callchain_record_opt(callgraph_buf, ¶m)) {
> + pr_err("per-event callgraph setting for %s failed. "
> + "Apply callgraph global setting for it\n",
> + evsel->name);
> + return;
hum, calling parse_callchain_record_opt from evsel hurts the python code:
17: Try 'import perf' in python, checking link problems :
--- start ---
test child forked, pid 25751
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ImportError: python/perf.so: undefined symbol: parse_callchain_record_opt
test child finished with -1
---- end ----
Try 'import perf' in python, checking link problems: FAILED!
not sure if we can call it from some place else (I guess not),
then we'd need to either put the util/callchain.c under python
objects, or somehow refine needed parsing code..
jirka
> + }
> + }
> + }
> + if (dump_size > 0)
> + param.dump_size = dump_size;
> +
> + /* If global callgraph set, clear it */
> + if (callchain_param.enabled)
> + perf_evsel__reset_callgraph(evsel, &callchain_param);
> +
> + /* set perf-event callgraph */
> + if (param.enabled)
> + perf_evsel__config_callgraph(evsel, opts, ¶m);
> + }
> }
>
> /*
> @@ -807,7 +868,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
> * Apply event specific term settings,
> * it overloads any global configuration.
> */
> - apply_config_terms(evsel);
> + apply_config_terms(evsel, opts);
> }
>
> static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index 6a12908..09a3022 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -40,6 +40,8 @@ struct cgroup_sel;
> enum {
> PERF_EVSEL__CONFIG_TERM_PERIOD,
> PERF_EVSEL__CONFIG_TERM_TIME,
> + PERF_EVSEL__CONFIG_TERM_CALLGRAPH,
> + PERF_EVSEL__CONFIG_TERM_STACK_USER,
> PERF_EVSEL__CONFIG_TERM_MAX,
> };
>
> @@ -49,6 +51,8 @@ struct perf_evsel_config_term {
> union {
> u64 period;
> bool time;
> + char *callgraph;
> + u64 stack_user;
> } val;
> };
>
> diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
> index a6cb9af..716f1fc 100644
> --- a/tools/perf/util/parse-events.c
> +++ b/tools/perf/util/parse-events.c
> @@ -611,6 +611,12 @@ do { \
> return -EINVAL;
> }
> break;
> + case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
> + CHECK_TYPE_VAL(STR);
> + break;
> + case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
> + CHECK_TYPE_VAL(NUM);
> + break;
> case PARSE_EVENTS__TERM_TYPE_NAME:
> CHECK_TYPE_VAL(STR);
> break;
> @@ -662,6 +668,12 @@ do { \
> case PARSE_EVENTS__TERM_TYPE_TIME:
> ADD_CONFIG_TERM(TIME, time, term->val.num);
> break;
> + case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
> + ADD_CONFIG_TERM(CALLGRAPH, callgraph, term->val.str);
> + break;
> + case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
> + ADD_CONFIG_TERM(STACK_USER, stack_user, term->val.num);
> + break;
> default:
> break;
> }
> diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
> index e6f9aacc..87dc9f6 100644
> --- a/tools/perf/util/parse-events.h
> +++ b/tools/perf/util/parse-events.h
> @@ -64,6 +64,8 @@ enum {
> PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD,
> PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE,
> PARSE_EVENTS__TERM_TYPE_TIME,
> + PARSE_EVENTS__TERM_TYPE_CALLGRAPH,
> + PARSE_EVENTS__TERM_TYPE_STACKSIZE,
> };
>
> struct parse_events_term {
> diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
> index f542750..1665497 100644
> --- a/tools/perf/util/parse-events.l
> +++ b/tools/perf/util/parse-events.l
> @@ -184,6 +184,8 @@ name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); }
> period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
> branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
> time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
> +call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
> +stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
> , { return ','; }
> "/" { BEGIN(INITIAL); return '/'; }
> {name_minus} { return str(yyscanner, PE_NAME); }
> diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
> index b615cdf..948f896 100644
> --- a/tools/perf/util/pmu.c
> +++ b/tools/perf/util/pmu.c
> @@ -607,7 +607,8 @@ static char *formats_error_string(struct list_head *formats)
> {
> struct perf_pmu_format *format;
> char *err, *str;
> - static const char *static_terms = "config,config1,config2,name,period,branch_type,time\n";
> + static const char *static_terms = "config,config1,config2,name,period,"
> + "branch_type,time,call-graph,stack-size\n";
> unsigned i = 0;
>
> if (!asprintf(&str, "valid terms:"))
> --
> 1.8.3.1
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/