Re: Perf Oops on 3.14-rc2

From: Peter Zijlstra
Date: Wed Feb 19 2014 - 11:29:04 EST


On Tue, Feb 18, 2014 at 10:18:31AM +0000, Will Deacon wrote:
> On Mon, Feb 10, 2014 at 10:17:59PM +0000, Drew Richardson wrote:
> > While adding CPU on/offlining support during perf captures I get an
> > Oops both on ARM as well as my desktop x86_64. Below is a small
> > program that duplicates the issue.
>
> [...]
>
> FWIW I can reproduce this easily with -rc3 on my x86 laptop running
> hackbench in parallel with a tweaked version of your test (using
> _SC_NPROCESSORS_ONLN instead of _SC_NPROCESSORS_CONF and hotplugging off
> both CPU2 and CPU3).
>

OK; found it, or at least, I stopped being able to make my box explode.

Drew, can you try the below and confirm? Its still got all the debug goo
in too, but *shees* took me long enough.

Stephane, there's two task_function_call()s in the CGROUP_PERF code that
don't check return codes; can you please audit those?

---
kernel/events/core.c | 128 ++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 101 insertions(+), 27 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 2067cbb378eb..7c378fc21bff 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -78,7 +78,7 @@ static void remote_function(void *data)
* -ESRCH - when the process isn't running
* -EAGAIN - when the process moved away
*/
-static int
+static int __must_check
task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
{
struct remote_function_call data = {
@@ -103,7 +103,8 @@ task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
*
* returns: @func return value or -ENXIO when the cpu is offline
*/
-static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
+static int __must_check
+cpu_function_call(int cpu, int (*func) (void *info), void *info)
{
struct remote_function_call data = {
.p = NULL,
@@ -1500,14 +1501,23 @@ static void perf_remove_from_context(struct perf_event *event)

if (!task) {
/*
- * Per cpu events are removed via an smp call and
- * the removal is always successful.
+ * Per cpu events are removed via an smp call.
*/
- cpu_function_call(event->cpu, __perf_remove_from_context, event);
+retry1:
+ if (!cpu_function_call(event->cpu, __perf_remove_from_context, event))
+ return;
+
+ raw_spin_lock_irq(&ctx->lock);
+ if (ctx->is_active) {
+ raw_spin_unlock_irq(&ctx->lock);
+ goto retry1;
+ }
+ list_del_event(event, ctx);
+ raw_spin_unlock_irq(&ctx->lock);
return;
}

-retry:
+retry2:
if (!task_function_call(task, __perf_remove_from_context, event))
return;

@@ -1518,7 +1528,7 @@ static void perf_remove_from_context(struct perf_event *event)
*/
if (ctx->is_active) {
raw_spin_unlock_irq(&ctx->lock);
- goto retry;
+ goto retry2;
}

/*
@@ -1592,11 +1602,24 @@ void perf_event_disable(struct perf_event *event)
/*
* Disable the event on the cpu that it's on
*/
- cpu_function_call(event->cpu, __perf_event_disable, event);
+retry1:
+ if (!cpu_function_call(event->cpu, __perf_event_disable, event))
+ return;
+
+ raw_spin_lock_irq(&ctx->lock);
+ if (event->state == PERF_EVENT_STATE_ACTIVE) {
+ raw_spin_unlock_irq(&ctx->lock);
+ goto retry1;
+ }
+ if (event->state == PERF_EVENT_STATE_INACTIVE) {
+ update_group_times(event);
+ event->state = PERF_EVENT_STATE_OFF;
+ }
+ raw_spin_unlock_irq(&ctx->lock);
return;
}

-retry:
+retry2:
if (!task_function_call(task, __perf_event_disable, event))
return;

@@ -1611,7 +1634,7 @@ void perf_event_disable(struct perf_event *event)
* a concurrent perf_event_context_sched_out().
*/
task = ctx->task;
- goto retry;
+ goto retry2;
}

/*
@@ -1939,14 +1962,22 @@ perf_install_in_context(struct perf_event_context *ctx,

if (!task) {
/*
- * Per cpu events are installed via an smp call and
- * the install is always successful.
+ * Per cpu events are installed via an smp call.
*/
- cpu_function_call(cpu, __perf_install_in_context, event);
+retry1:
+ if (!cpu_function_call(cpu, __perf_install_in_context, event))
+ return;
+ raw_spin_lock_irq(&ctx->lock);
+ if (ctx->is_active) {
+ raw_spin_unlock_irq(&ctx->lock);
+ goto retry1;
+ }
+ add_event_to_ctx(event, ctx);
+ raw_spin_unlock_irq(&ctx->lock);
return;
}

-retry:
+retry2:
if (!task_function_call(task, __perf_install_in_context, event))
return;

@@ -1957,7 +1988,7 @@ perf_install_in_context(struct perf_event_context *ctx,
*/
if (ctx->is_active) {
raw_spin_unlock_irq(&ctx->lock);
- goto retry;
+ goto retry2;
}

/*
@@ -2086,7 +2117,15 @@ void perf_event_enable(struct perf_event *event)
/*
* Enable the event on the cpu that it's on
*/
- cpu_function_call(event->cpu, __perf_event_enable, event);
+retry1:
+ if (!cpu_function_call(event->cpu, __perf_event_enable, event))
+ return;
+ raw_spin_lock_irq(&ctx->lock);
+ if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF) {
+ raw_spin_unlock_irq(&ctx->lock);
+ goto retry1;
+ }
+ raw_spin_unlock_irq(&ctx->lock);
return;
}

@@ -2104,7 +2143,7 @@ void perf_event_enable(struct perf_event *event)
if (event->state == PERF_EVENT_STATE_ERROR)
event->state = PERF_EVENT_STATE_OFF;

-retry:
+retry2:
if (!ctx->is_active) {
__perf_event_mark_enabled(event);
goto out;
@@ -2127,7 +2166,7 @@ void perf_event_enable(struct perf_event *event)
* perf_event_context_sched_out()
*/
task = ctx->task;
- goto retry;
+ goto retry2;
}

out:
@@ -3243,6 +3282,7 @@ static void __free_event(struct perf_event *event)

call_rcu(&event->rcu_head, free_event_rcu);
}
+
static void free_event(struct perf_event *event)
{
irq_work_sync(&event->pending);
@@ -3271,6 +3311,8 @@ static void free_event(struct perf_event *event)
if (is_cgroup_event(event))
perf_detach_cgroup(event);

+ printk("destroying event (%p) : %d:%lx for cpu: %d\n",
+ event, event->attr.type, event->attr.config, event->cpu);

__free_event(event);
}
@@ -4831,6 +4873,28 @@ perf_event_aux_ctx(struct perf_event_context *ctx,
struct perf_event *event;

list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
+ if (!event) {
+ struct perf_event *tmp;
+ printk("on cpu: %d, %p:(%p,%p)\n", smp_processor_id(),
+ &ctx->event_list,
+ ctx->event_list.prev,
+ ctx->event_list.next);
+
+ tmp = ctx->event_list.next;
+ while (tmp) {
+ printk("%02d: event(%p): %d:%lx %d %d\n",
+ smp_processor_id(), tmp,
+ tmp ? tmp->attr.type : -1,
+ tmp ? tmp->attr.config : 0,
+ tmp ? tmp->state : 0,
+ tmp ? tmp->cpu : 0);
+ printk("%02d: ->next (%p)\n",
+ smp_processor_id(),
+ tmp->event_entry.next);
+ tmp = tmp->event_entry.next;
+ }
+ }
+
if (event->state < PERF_EVENT_STATE_INACTIVE)
continue;
if (!event_filter_match(event))
@@ -6722,6 +6786,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,

event->state = PERF_EVENT_STATE_INACTIVE;

+ printk("creating event (%p) : %d:%lx for cpu: %d\n",
+ event, attr->type, attr->config, cpu);
+
if (task) {
event->attach_state = PERF_ATTACH_TASK;

@@ -7869,29 +7936,35 @@ static void perf_pmu_rotate_stop(struct pmu *pmu)

static void __perf_event_exit_context(void *__info)
{
- struct perf_event_context *ctx = __info;
- struct perf_event *event, *tmp;
+ struct perf_cpu_context *cpuctx = __info;
+ struct perf_event_context *ctx = &cpuctx->ctx;
+ struct perf_event *event;

perf_pmu_rotate_stop(ctx->pmu);

- list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
- __perf_remove_from_context(event);
- list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry)
- __perf_remove_from_context(event);
+ raw_spin_lock(&ctx->lock);
+ ctx_sched_out(ctx, cpuctx, EVENT_ALL);
+
+ list_for_each_entry(event, &ctx->event_list, event_entry)
+ event->state = PERF_EVENT_STATE_off;
+
+ raw_spin_unlock(&ctx->lock);
}

static void perf_event_exit_cpu_context(int cpu)
{
+ struct perf_cpu_context *cpuctx;
struct perf_event_context *ctx;
struct pmu *pmu;
int idx;

idx = srcu_read_lock(&pmus_srcu);
list_for_each_entry_rcu(pmu, &pmus, entry) {
- ctx = &per_cpu_ptr(pmu->pmu_cpu_context, cpu)->ctx;
+ cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
+ ctx = &cpuctx->ctx;

mutex_lock(&ctx->mutex);
- smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
+ smp_call_function_single(cpu, __perf_event_exit_context, cpuctx, 1);
mutex_unlock(&ctx->mutex);
}
srcu_read_unlock(&pmus_srcu, idx);
@@ -7901,11 +7974,12 @@ static void perf_event_exit_cpu(int cpu)
{
struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);

+ perf_event_exit_cpu_context(cpu);
+
mutex_lock(&swhash->hlist_mutex);
swevent_hlist_release(swhash);
mutex_unlock(&swhash->hlist_mutex);

- perf_event_exit_cpu_context(cpu);
}
#else
static inline void perf_event_exit_cpu(int cpu) { }
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/