[PATCH 1/1] perf/core: find auxiliary events in running pmus list

From: kan . liang
Date: Wed Feb 24 2016 - 16:22:57 EST


From: Kan Liang <kan.liang@xxxxxxxxx>

perf_event_aux funciton goes through pmus list to find proper auxiliary
events to output. The pmus list consists of all possible pmus in the
system, that may or may not be running at the moment, while the
auxiliary events must be from the running pmus. Therefore searching
non-running pmus is unnecessary and expensive especially when there are
many non-running pmus on the list.

For example, the brk test case in lkp triggers many mmap operations,
at the time, perf with cycles:pp is also running on the system. As a
result, many perf_event_aux are invoked, and each would search the whole
pmus list. If we enable the uncore support (even when uncore event are
not really used), dozens of uncore pmus will be added into pmus list,
which can significantly decrease brk_test's ops_per_sec. Based on our
test, the ops_per_sec without uncore patch is 2647573, while the
ops_per_sec with uncore patch is only 1768444, which is a 33.2%
reduction.

This patch introduces a running_pmus list which only tracks the running
pmus in the system. The perf_event_aux uses running_pmus list instead of
pmus list to find auxiliary events.

Reported-by: Huang, Ying <ying.huang@xxxxxxxxxxxxxxx>
Signed-off-by: Kan Liang <kan.liang@xxxxxxxxx>
---
kernel/events/core.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 56 insertions(+), 1 deletion(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 94c47e3..e33a0de 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -335,6 +335,14 @@ static LIST_HEAD(pmus);
static DEFINE_MUTEX(pmus_lock);
static struct srcu_struct pmus_srcu;

+struct running_pmu {
+ struct list_head entry;
+ struct pmu *pmu;
+ int nr_event;
+};
+static LIST_HEAD(running_pmus);
+static DEFINE_MUTEX(running_pmus_lock);
+
/*
* perf event paranoia level:
* -1 - not paranoid at all
@@ -3511,6 +3519,23 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu)
atomic_dec(&per_cpu(perf_cgroup_events, cpu));
}

+static void unaccount_running_pmu(struct perf_event *event)
+{
+ struct running_pmu *pmu, *tmp;
+
+ mutex_lock(&running_pmus_lock);
+
+ list_for_each_entry_safe(pmu, tmp, &running_pmus, entry) {
+ if ((pmu->pmu == event->pmu) && !(--pmu->nr_event)) {
+ list_del_rcu(&pmu->entry);
+ kfree(pmu);
+ break;
+ }
+ }
+
+ mutex_unlock(&running_pmus_lock);
+}
+
static void unaccount_event(struct perf_event *event)
{
bool dec = false;
@@ -3541,6 +3566,8 @@ static void unaccount_event(struct perf_event *event)
static_key_slow_dec_deferred(&perf_sched_events);

unaccount_event_cpu(event, event->cpu);
+
+ unaccount_running_pmu(event);
}

/*
@@ -5616,6 +5643,7 @@ perf_event_aux(perf_event_aux_output_cb output, void *data,
{
struct perf_cpu_context *cpuctx;
struct perf_event_context *ctx;
+ struct running_pmu *running_pmu;
struct pmu *pmu;
int ctxn;

@@ -5631,7 +5659,9 @@ perf_event_aux(perf_event_aux_output_cb output, void *data,
}

rcu_read_lock();
- list_for_each_entry_rcu(pmu, &pmus, entry) {
+
+ list_for_each_entry_rcu(running_pmu, &running_pmus, entry) {
+ pmu = running_pmu->pmu;
cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
if (cpuctx->unique_pmu != pmu)
goto next;
@@ -7740,6 +7770,29 @@ static void account_event_cpu(struct perf_event *event, int cpu)
atomic_inc(&per_cpu(perf_cgroup_events, cpu));
}

+static void account_running_pmu(struct perf_event *event)
+{
+ struct running_pmu *pmu;
+
+ mutex_lock(&running_pmus_lock);
+
+ list_for_each_entry(pmu, &running_pmus, entry) {
+ if (pmu->pmu == event->pmu) {
+ pmu->nr_event++;
+ goto out;
+ }
+ }
+
+ pmu = kzalloc(sizeof(struct running_pmu), GFP_KERNEL);
+ if (pmu != NULL) {
+ pmu->nr_event++;
+ pmu->pmu = event->pmu;
+ list_add_rcu(&pmu->entry, &running_pmus);
+ }
+out:
+ mutex_unlock(&running_pmus_lock);
+}
+
static void account_event(struct perf_event *event)
{
bool inc = false;
@@ -7772,6 +7825,8 @@ static void account_event(struct perf_event *event)
static_key_slow_inc(&perf_sched_events.key);

account_event_cpu(event, event->cpu);
+
+ account_running_pmu(event);
}

/*
--
2.5.0