[tip:perf/core] x86/perf/intel/cstate: Make cstate hotplug handling actually work

From: tip-bot for Thomas Gleixner
Date: Thu Mar 31 2016 - 05:21:49 EST


Commit-ID: 49de0493e5f67a8023fa6fa5c89097c1f77de74e
Gitweb: http://git.kernel.org/tip/49de0493e5f67a8023fa6fa5c89097c1f77de74e
Author: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
AuthorDate: Sun, 20 Mar 2016 18:59:02 +0000
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitDate: Thu, 31 Mar 2016 10:30:36 +0200

x86/perf/intel/cstate: Make cstate hotplug handling actually work

The current implementation aside of being an incomprehensible mess is broken.

# cat /sys/bus/event_source/devices/cstate_core/cpumask
0-17

That's on a quad socket machine with 72 physical cores! Qualitee stuff.

So it's not a surprise that event migration in case of CPU hotplug does not
work either.

# perf stat -e cstate_core/c6-residency/ -C 1 sleep 60 &
# echo 0 >/sys/devices/system/cpu/cpu1/online

Tracing cstate_pmu_event_update gives me:

[001] cstate_pmu_event_update <-event_sched_out

After the fix it properly moves the event:

[001] cstate_pmu_event_update <-event_sched_out
[073] cstate_pmu_event_update <-__perf_event_read
[073] cstate_pmu_event_update <-event_sched_out

The migration of pkg events does not work either. Not that I'm surprised.

I really could not be bothered to decode that loop mess and simply replaced it
by querying the proper cpumasks which give us the answer in a comprehensible
way.

This also requires to direct the event to the current active reader CPU in
cstate_pmu_event_init() otherwise the hotplug logic can't work.

Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
[ Added event->cpu < 0 test to not explode]
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Cc: Alexander Shishkin <alexander.shishkin@xxxxxxxxxxxxxxx>
Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxx>
Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
Cc: Kan Liang <kan.liang@xxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Stephane Eranian <eranian@xxxxxxxxxx>
Cc: Vince Weaver <vincent.weaver@xxxxxxxxx>
Link: http://lkml.kernel.org/r/20160320185623.422519970@xxxxxxxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
arch/x86/events/intel/cstate.c | 122 ++++++++++++++++++-----------------------
1 file changed, 53 insertions(+), 69 deletions(-)

diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 7946c42..5c2f55f 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -385,7 +385,7 @@ static ssize_t cstate_get_attr_cpumask(struct device *dev,
static int cstate_pmu_event_init(struct perf_event *event)
{
u64 cfg = event->attr.config;
- int ret = 0;
+ int cpu;

if (event->attr.type != event->pmu->type)
return -ENOENT;
@@ -400,26 +400,36 @@ static int cstate_pmu_event_init(struct perf_event *event)
event->attr.sample_period) /* no sampling */
return -EINVAL;

+ if (event->cpu < 0)
+ return -EINVAL;
+
if (event->pmu == &cstate_core_pmu) {
if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
return -EINVAL;
if (!core_msr[cfg].attr)
return -EINVAL;
event->hw.event_base = core_msr[cfg].msr;
+ cpu = cpumask_any_and(&cstate_core_cpu_mask,
+ topology_sibling_cpumask(event->cpu));
} else if (event->pmu == &cstate_pkg_pmu) {
if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
return -EINVAL;
if (!pkg_msr[cfg].attr)
return -EINVAL;
event->hw.event_base = pkg_msr[cfg].msr;
- } else
+ cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
+ topology_core_cpumask(event->cpu));
+ } else {
return -ENOENT;
+ }

- /* must be done before validate_group */
+ if (cpu >= nr_cpu_ids)
+ return -ENODEV;
+
+ event->cpu = cpu;
event->hw.config = cfg;
event->hw.idx = -1;
-
- return ret;
+ return 0;
}

static inline u64 cstate_pmu_read_counter(struct perf_event *event)
@@ -469,102 +479,76 @@ static int cstate_pmu_event_add(struct perf_event *event, int mode)
return 0;
}

+/*
+ * Check if exiting cpu is the designated reader. If so migrate the
+ * events when there is a valid target available
+ */
static void cstate_cpu_exit(int cpu)
{
- int i, id, target;
+ unsigned int target;

- /* cpu exit for cstate core */
- if (has_cstate_core) {
- id = topology_core_id(cpu);
- target = -1;
-
- for_each_online_cpu(i) {
- if (i == cpu)
- continue;
- if (id == topology_core_id(i)) {
- target = i;
- break;
- }
- }
- if (cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask) && target >= 0)
+ if (has_cstate_core &&
+ cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask)) {
+
+ target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
+ /* Migrate events if there is a valid target */
+ if (target < nr_cpu_ids) {
cpumask_set_cpu(target, &cstate_core_cpu_mask);
- WARN_ON(cpumask_empty(&cstate_core_cpu_mask));
- if (target >= 0)
perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
+ }
}

- /* cpu exit for cstate pkg */
- if (has_cstate_pkg) {
- id = topology_physical_package_id(cpu);
- target = -1;
-
- for_each_online_cpu(i) {
- if (i == cpu)
- continue;
- if (id == topology_physical_package_id(i)) {
- target = i;
- break;
- }
- }
- if (cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask) && target >= 0)
+ if (has_cstate_pkg &&
+ cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) {
+
+ target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+ /* Migrate events if there is a valid target */
+ if (target < nr_cpu_ids) {
cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
- WARN_ON(cpumask_empty(&cstate_pkg_cpu_mask));
- if (target >= 0)
perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
+ }
}
}

static void cstate_cpu_init(int cpu)
{
- int i, id;
+ unsigned int target;

- /* cpu init for cstate core */
- if (has_cstate_core) {
- id = topology_core_id(cpu);
- for_each_cpu(i, &cstate_core_cpu_mask) {
- if (id == topology_core_id(i))
- break;
- }
- if (i >= nr_cpu_ids)
- cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
- }
+ /*
+ * If this is the first online thread of that core, set it in
+ * the core cpu mask as the designated reader.
+ */
+ target = cpumask_any_and(&cstate_core_cpu_mask,
+ topology_sibling_cpumask(cpu));

- /* cpu init for cstate pkg */
- if (has_cstate_pkg) {
- id = topology_physical_package_id(cpu);
- for_each_cpu(i, &cstate_pkg_cpu_mask) {
- if (id == topology_physical_package_id(i))
- break;
- }
- if (i >= nr_cpu_ids)
- cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
- }
+ if (has_cstate_core && target >= nr_cpu_ids)
+ cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
+
+ /*
+ * If this is the first online thread of that package, set it
+ * in the package cpu mask as the designated reader.
+ */
+ target = cpumask_any_and(&cstate_pkg_cpu_mask,
+ topology_core_cpumask(cpu));
+ if (has_cstate_pkg && target >= nr_cpu_ids)
+ cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
}

static int cstate_cpu_notifier(struct notifier_block *self,
- unsigned long action, void *hcpu)
+ unsigned long action, void *hcpu)
{
unsigned int cpu = (long)hcpu;

switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_UP_PREPARE:
- break;
case CPU_STARTING:
cstate_cpu_init(cpu);
break;
- case CPU_UP_CANCELED:
- case CPU_DYING:
- break;
- case CPU_ONLINE:
- case CPU_DEAD:
- break;
case CPU_DOWN_PREPARE:
cstate_cpu_exit(cpu);
break;
default:
break;
}
-
return NOTIFY_OK;
}