[PATCH V2 02/13] perf/core: output overhead when sched out from context

From: kan . liang
Date: Fri Dec 02 2016 - 16:22:57 EST


From: Kan Liang <kan.liang@xxxxxxxxx>

Outputing every overhead when it happens is very costly. The accumulated
time is more meaningful. So the overhead information should be outputted
at the very end.

The overhead information is outputted when task is scheduling out or the
event is going to be disabled.
The arch specific overhead is outputted in event pmu delete, when Flag
PERF_EF_LOG is set.

Signed-off-by: Kan Liang <kan.liang@xxxxxxxxx>
---
include/linux/perf_event.h | 2 ++
kernel/events/core.c | 9 ++++++++-
2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 5bc8156..ebd356e 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -313,6 +313,7 @@ struct pmu {
#define PERF_EF_START 0x01 /* start the counter when adding */
#define PERF_EF_RELOAD 0x02 /* reload the counter when starting */
#define PERF_EF_UPDATE 0x04 /* update the counter when stopping */
+#define PERF_EF_LOG 0x08 /* log overhead information */

/*
* Adds/Removes a counter to/from the PMU, can be done inside a
@@ -741,6 +742,7 @@ struct perf_event_context {
int nr_stat;
int nr_freq;
int rotate_disable;
+ int log_overhead;
atomic_t refcount;
struct task_struct *task;

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5312744..306bc92 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1787,6 +1787,7 @@ event_sched_out(struct perf_event *event,
struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx)
{
+ bool log_overhead = needs_log_overhead(event) & ctx->log_overhead;
u64 tstamp = perf_event_time(event);
u64 delta;

@@ -1812,7 +1813,7 @@ event_sched_out(struct perf_event *event,
perf_pmu_disable(event->pmu);

event->tstamp_stopped = tstamp;
- event->pmu->del(event, 0);
+ event->pmu->del(event, log_overhead ? PERF_EF_LOG : 0);
event->oncpu = -1;
event->state = PERF_EVENT_STATE_INACTIVE;
if (event->pending_disable) {
@@ -1914,6 +1915,9 @@ static void __perf_event_disable(struct perf_event *event,
if (event->state < PERF_EVENT_STATE_INACTIVE)
return;

+ /* log overhead when disable event */
+ ctx->log_overhead = true;
+
update_context_time(ctx);
update_cgrp_time_from_event(event);
update_group_times(event);
@@ -10177,6 +10181,9 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
if (!child_ctx)
return;

+ /* log overhead when exit task context */
+ child_ctx->log_overhead = true;
+
/*
* In order to reduce the amount of tricky in ctx tear-down, we hold
* ctx::mutex over the entire thing. This serializes against almost
--
2.5.5