Re: [PATCH 1/5] perf, tools: Add support for skipping itrace instructions
From: Arnaldo Carvalho de Melo
Date: Tue Mar 29 2016 - 12:27:33 EST
Em Mon, Mar 28, 2016 at 10:45:38AM -0700, Andi Kleen escreveu:
> From: Andi Kleen <ak@xxxxxxxxxxxxxxx>
>
> When using perf script to look at PT traces it is often
> useful to ignore the initialization code at the beginning.
>
> On larger traces which may have many millions of instructions
> in initialization code doing that in a pipeline can be very
> slow, with perf script spending a lot of CPU time calling
> printf and writing data.
>
> This patch adds an extension to the --itrace argument
> that skips 'n' events (instructions, branches or transactions)
> at the beginning. This is much more efficient.
>
> Cc: Adrian Hunter <adrian.hunter@xxxxxxxxx>
Adrian, are you ok now? Can I have your Acked-by?
- Arnaldo
> v2:
> Add support for BTS (Adrian Hunter)
> Document in itrace.txt
> Fix branch check
> Check transactions and instructions too
> Signed-off-by: Andi Kleen <ak@xxxxxxxxxxxxxxx>
> ---
> tools/perf/Documentation/intel-pt.txt | 7 +++++++
> tools/perf/Documentation/itrace.txt | 8 ++++++++
> tools/perf/util/auxtrace.c | 7 +++++++
> tools/perf/util/auxtrace.h | 2 ++
> tools/perf/util/intel-bts.c | 5 +++++
> tools/perf/util/intel-pt.c | 22 ++++++++++++++++++++--
> 6 files changed, 49 insertions(+), 2 deletions(-)
>
> diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
> index be764f9..c6c8318 100644
> --- a/tools/perf/Documentation/intel-pt.txt
> +++ b/tools/perf/Documentation/intel-pt.txt
> @@ -672,6 +672,7 @@ The letters are:
> d create a debug log
> g synthesize a call chain (use with i or x)
> l synthesize last branch entries (use with i or x)
> + s skip initial number of events
>
> "Instructions" events look like they were recorded by "perf record -e
> instructions".
> @@ -730,6 +731,12 @@ from one sample to the next.
>
> To disable trace decoding entirely, use the option --no-itrace.
>
> +It is also possible to skip events generated (instructions, branches, transactions)
> +at the beginning. This is useful to ignore initialization code.
> +
> + --itrace=i0nss1000000
> +
> +skips the first million instructions.
>
> dump option
> -----------
> diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
> index 65453f4..e2a4c5e 100644
> --- a/tools/perf/Documentation/itrace.txt
> +++ b/tools/perf/Documentation/itrace.txt
> @@ -7,6 +7,7 @@
> d create a debug log
> g synthesize a call chain (use with i or x)
> l synthesize last branch entries (use with i or x)
> + s skip initial number of events
>
> The default is all events i.e. the same as --itrace=ibxe
>
> @@ -24,3 +25,10 @@
>
> Also the number of last branch entries (default 64, max. 1024) for
> instructions or transactions events can be specified.
> +
> + It is also possible to skip events generated (instructions, branches, transactions)
> + at the beginning. This is useful to ignore initialization code.
> +
> + --itrace=i0nss1000000
> +
> + skips the first million instructions.
> diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
> index ec164fe..c916901 100644
> --- a/tools/perf/util/auxtrace.c
> +++ b/tools/perf/util/auxtrace.c
> @@ -940,6 +940,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
> synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
> synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
> synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
> + synth_opts->initial_skip = 0;
> }
>
> /*
> @@ -1064,6 +1065,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
> synth_opts->last_branch_sz = val;
> }
> break;
> + case 's':
> + synth_opts->initial_skip = strtoul(p, &endptr, 10);
> + if (p == endptr)
> + goto out_err;
> + p = endptr;
> + break;
> case ' ':
> case ',':
> break;
> diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
> index 57ff31e..767989e 100644
> --- a/tools/perf/util/auxtrace.h
> +++ b/tools/perf/util/auxtrace.h
> @@ -68,6 +68,7 @@ enum itrace_period_type {
> * @last_branch_sz: branch context size
> * @period: 'instructions' events period
> * @period_type: 'instructions' events period type
> + * @initial_skip: skip N events at the beginning.
> */
> struct itrace_synth_opts {
> bool set;
> @@ -86,6 +87,7 @@ struct itrace_synth_opts {
> unsigned int last_branch_sz;
> unsigned long long period;
> enum itrace_period_type period_type;
> + unsigned long initial_skip;
> };
>
> /**
> diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
> index 6bc3ecd..3ff2f72f 100644
> --- a/tools/perf/util/intel-bts.c
> +++ b/tools/perf/util/intel-bts.c
> @@ -66,6 +66,7 @@ struct intel_bts {
> u64 branches_id;
> size_t branches_event_size;
> bool synth_needs_swap;
> + unsigned long num_events;
> };
>
> struct intel_bts_queue {
> @@ -275,6 +276,10 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
> union perf_event event;
> struct perf_sample sample = { .ip = 0, };
>
> + if (bts->synth_opts.initial_skip &&
> + bts->num_events++ <= bts->synth_opts.initial_skip)
> + return 0;
> +
> event.sample.header.type = PERF_RECORD_SAMPLE;
> event.sample.header.misc = PERF_RECORD_MISC_USER;
> event.sample.header.size = sizeof(struct perf_event_header);
> diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
> index 05d8158..07f0020 100644
> --- a/tools/perf/util/intel-pt.c
> +++ b/tools/perf/util/intel-pt.c
> @@ -100,6 +100,8 @@ struct intel_pt {
> u64 cyc_bit;
> u64 noretcomp_bit;
> unsigned max_non_turbo_ratio;
> +
> + unsigned long num_events;
> };
>
> enum switch_state {
> @@ -972,6 +974,10 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
> if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
> return 0;
>
> + if (pt->synth_opts.initial_skip &&
> + pt->num_events++ < pt->synth_opts.initial_skip)
> + return 0;
> +
> event->sample.header.type = PERF_RECORD_SAMPLE;
> event->sample.header.misc = PERF_RECORD_MISC_USER;
> event->sample.header.size = sizeof(struct perf_event_header);
> @@ -1028,6 +1034,10 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
> union perf_event *event = ptq->event_buf;
> struct perf_sample sample = { .ip = 0, };
>
> + if (pt->synth_opts.initial_skip &&
> + pt->num_events++ < pt->synth_opts.initial_skip)
> + return 0;
> +
> event->sample.header.type = PERF_RECORD_SAMPLE;
> event->sample.header.misc = PERF_RECORD_MISC_USER;
> event->sample.header.size = sizeof(struct perf_event_header);
> @@ -1085,6 +1095,10 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
> union perf_event *event = ptq->event_buf;
> struct perf_sample sample = { .ip = 0, };
>
> + if (pt->synth_opts.initial_skip &&
> + pt->num_events++ < pt->synth_opts.initial_skip)
> + return 0;
> +
> event->sample.header.type = PERF_RECORD_SAMPLE;
> event->sample.header.misc = PERF_RECORD_MISC_USER;
> event->sample.header.size = sizeof(struct perf_event_header);
> @@ -1196,14 +1210,18 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
> ptq->have_sample = false;
>
> if (pt->sample_instructions &&
> - (state->type & INTEL_PT_INSTRUCTION)) {
> + (state->type & INTEL_PT_INSTRUCTION) &&
> + (!pt->synth_opts.initial_skip ||
> + pt->num_events++ >= pt->synth_opts.initial_skip)) {
> err = intel_pt_synth_instruction_sample(ptq);
> if (err)
> return err;
> }
>
> if (pt->sample_transactions &&
> - (state->type & INTEL_PT_TRANSACTION)) {
> + (state->type & INTEL_PT_TRANSACTION) &&
> + (!pt->synth_opts.initial_skip ||
> + pt->num_events++ >= pt->synth_opts.initial_skip)) {
> err = intel_pt_synth_transaction_sample(ptq);
> if (err)
> return err;
> --
> 2.5.5