[PATCH] perf/x86/intel: Add proper condition to run sched_task callbacks

From: Jiri Olsa
Date: Mon Jul 17 2017 - 11:02:03 EST


The x86 pmu currently uses the sched_task callback for 2 functions:
- PEBS drain
- save/restore LBR data

They are both triggered once the x86 pmu is registered with
perf_sched_cb_inc call (within pmu::add callback), regardless
if there's actually any PEBS or LBR event configured on the cpu.

This can lead to extra cycles in some perf monitoring, like
when we monitor PEBS event without LBR data. We need PEBS,
non freq/timestamp event to enable the sched_task callback:

# perf record --no-timestamp -c 10000 -e cycles:p ./perf bench sched pipe -l 1000000

The perf stat with cycles and msr:write_msr if above command before:
...
Performance counter stats for './perf record --no-timestamp -c 10000 -e cycles:p \
./perf bench sched pipe -l 1000000' (5 runs):

18,519,557,441 cycles:k
91,195,527 msr:write_msr

29.334476406 seconds time elapsed

And after the change:
...
Performance counter stats for './perf record --no-timestamp -c 10000 -e cycles:p \
./perf bench sched pipe -l 1000000' (5 runs):

18,565,757,840 cycles:k
27,103,160 msr:write_msr

16.253026030 seconds time elapsed

There's no affect on cycles:k because the sched_task happens
with events switched off, however the msr:write_msr tracepoint
counter and almost 50% of time speedup show the improvement.

Monitoring LBR event and having extra PEBS drain processing
in sched_task callback showed just a little speedup, because
the drain function does not do much extra work in case there
is no PEBS data.

Fixing this by adding PEBS and LBR conditions for relevant
event data being configured on cpu into intel_pmu_sched_task
callback.

Signed-off-by: Jiri Olsa <jolsa@xxxxxxxxxx>
---
arch/x86/events/intel/core.c | 6 ++++--
arch/x86/events/intel/ds.c | 8 ++++----
arch/x86/events/perf_event.h | 2 ++
3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index aa62437d1aa1..1f66356d8122 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3265,9 +3265,11 @@ static void intel_pmu_cpu_dying(int cpu)
static void intel_pmu_sched_task(struct perf_event_context *ctx,
bool sched_in)
{
- if (x86_pmu.pebs_active)
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ if (intel_pmu_pebs_needs_sched_cb(cpuc))
intel_pmu_pebs_sched_task(ctx, sched_in);
- if (x86_pmu.lbr_nr)
+ if (cpuc->lbr_users)
intel_pmu_lbr_sched_task(ctx, sched_in);
}

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index c6d23ffe422d..c42e68efd6ec 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -811,7 +811,7 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
* the large interrupt threshold, such that we can provide PID and TID
* to PEBS samples.
*/
-static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
+inline bool intel_pmu_pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
{
return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
}
@@ -841,7 +841,7 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu)
*/
bool update = cpuc->n_pebs == 1;

- if (needed_cb != pebs_needs_sched_cb(cpuc)) {
+ if (needed_cb != intel_pmu_pebs_needs_sched_cb(cpuc)) {
if (!needed_cb)
perf_sched_cb_inc(pmu);
else
@@ -858,7 +858,7 @@ void intel_pmu_pebs_add(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
- bool needed_cb = pebs_needs_sched_cb(cpuc);
+ bool needed_cb = intel_pmu_pebs_needs_sched_cb(cpuc);

cpuc->n_pebs++;
if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
@@ -896,7 +896,7 @@ void intel_pmu_pebs_del(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
- bool needed_cb = pebs_needs_sched_cb(cpuc);
+ bool needed_cb = intel_pmu_pebs_needs_sched_cb(cpuc);

cpuc->n_pebs--;
if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 53728eea1bed..3a1acc40bfee 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -909,6 +909,8 @@ void intel_pmu_pebs_disable_all(void);

void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);

+bool intel_pmu_pebs_needs_sched_cb(struct cpu_hw_events *cpuc);
+
void intel_ds_init(void);

void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
--
2.9.4