Re: [PATCH 04/14] perf/x86: output side-band events overhead

From: Mark Rutland
Date: Thu Nov 24 2016 - 11:22:19 EST


On Wed, Nov 23, 2016 at 04:44:42AM -0500, kan.liang@xxxxxxxxx wrote:
> From: Kan Liang <kan.liang@xxxxxxxxx>
>
> Iterating all events which need to receive side-band events also bring
> some overhead.
> Save the overhead information in task context or CPU context, whichever
> context is available.

Do we really want to expose this concept to userspace?

What if the implementation changes?

Thanks,
Mark.

> Signed-off-by: Kan Liang <kan.liang@xxxxxxxxx>
> ---
> include/linux/perf_event.h | 2 ++
> include/uapi/linux/perf_event.h | 1 +
> kernel/events/core.c | 32 ++++++++++++++++++++++++++++----
> 3 files changed, 31 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index f72b97a..ec3cb7f 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -764,6 +764,8 @@ struct perf_event_context {
> #endif
> void *task_ctx_data; /* pmu specific data */
> struct rcu_head rcu_head;
> +
> + struct perf_overhead_entry sb_overhead;
> };
>
> /*
> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> index 9124c7c..5e7c522 100644
> --- a/include/uapi/linux/perf_event.h
> +++ b/include/uapi/linux/perf_event.h
> @@ -994,6 +994,7 @@ struct perf_branch_entry {
> enum perf_record_overhead_type {
> PERF_NMI_OVERHEAD = 0,
> PERF_MUX_OVERHEAD,
> + PERF_SB_OVERHEAD,
>
> PERF_OVERHEAD_MAX,
> };
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 9934059..51e9df7 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -1829,9 +1829,15 @@ event_sched_out(struct perf_event *event,
> if (event->attr.exclusive || !cpuctx->active_oncpu)
> cpuctx->exclusive = 0;
>
> - if (log_overhead && cpuctx->mux_overhead.nr) {
> - cpuctx->mux_overhead.cpu = smp_processor_id();
> - perf_log_overhead(event, PERF_MUX_OVERHEAD, &cpuctx->mux_overhead);
> + if (log_overhead) {
> + if (cpuctx->mux_overhead.nr) {
> + cpuctx->mux_overhead.cpu = smp_processor_id();
> + perf_log_overhead(event, PERF_MUX_OVERHEAD, &cpuctx->mux_overhead);
> + }
> + if (ctx->sb_overhead.nr) {
> + ctx->sb_overhead.cpu = smp_processor_id();
> + perf_log_overhead(event, PERF_SB_OVERHEAD, &ctx->sb_overhead);
> + }
> }
>
> perf_pmu_enable(event->pmu);
> @@ -6133,6 +6139,14 @@ static void perf_iterate_sb_cpu(perf_iterate_f output, void *data)
> }
> }
>
> +static void
> +perf_caculate_sb_overhead(struct perf_event_context *ctx,
> + u64 time)
> +{
> + ctx->sb_overhead.nr++;
> + ctx->sb_overhead.time += time;
> +}
> +
> /*
> * Iterate all events that need to receive side-band events.
> *
> @@ -6143,9 +6157,12 @@ static void
> perf_iterate_sb(perf_iterate_f output, void *data,
> struct perf_event_context *task_ctx)
> {
> + struct perf_event_context *overhead_ctx = task_ctx;
> struct perf_event_context *ctx;
> + u64 start_clock, end_clock;
> int ctxn;
>
> + start_clock = perf_clock();
> rcu_read_lock();
> preempt_disable();
>
> @@ -6163,12 +6180,19 @@ perf_iterate_sb(perf_iterate_f output, void *data,
>
> for_each_task_context_nr(ctxn) {
> ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
> - if (ctx)
> + if (ctx) {
> perf_iterate_ctx(ctx, output, data, false);
> + if (!overhead_ctx)
> + overhead_ctx = ctx;
> + }
> }
> done:
> preempt_enable();
> rcu_read_unlock();
> +
> + end_clock = perf_clock();
> + if (overhead_ctx)
> + perf_caculate_sb_overhead(overhead_ctx, end_clock - start_clock);
> }
>
> /*
> --
> 2.5.5
>