Re: PERF_EVENT_IOC_SET_OUTPUT

From: Peter Zijlstra
Date: Mon Oct 07 2013 - 12:43:20 EST


On Thu, Oct 03, 2013 at 08:43:51AM +0200, Ingo Molnar wrote:
> If you find time to send an updated version that boots then I can try to
> trace it to figure out where it fails, if it still fails.

Can you give the below a spin?

---
Subject: perf: Fix the perf context switch optimization
From: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Date: Mon Oct 7 17:12:48 CEST 2013

Currently we only optimize the context switch between two contexts that
have the same parent; this forgoes the optimization between parent and
child context, even though these contexts could be equivalent too.

Signed-off-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
---
kernel/events/core.c | 64 ++++++++++++++++++++++++++++++++++++---------------
1 file changed, 46 insertions(+), 18 deletions(-)

--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -899,6 +899,7 @@ static void unclone_ctx(struct perf_even
put_ctx(ctx->parent_ctx);
ctx->parent_ctx = NULL;
}
+ ctx->generation++;
}

static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
@@ -1136,6 +1137,8 @@ list_add_event(struct perf_event *event,
ctx->nr_events++;
if (event->attr.inherit_stat)
ctx->nr_stat++;
+
+ ctx->generation++;
}

/*
@@ -1313,6 +1316,8 @@ list_del_event(struct perf_event *event,
*/
if (event->state > PERF_EVENT_STATE_OFF)
event->state = PERF_EVENT_STATE_OFF;
+
+ ctx->generation++;
}

static void perf_group_detach(struct perf_event *event)
@@ -2149,22 +2154,38 @@ static void ctx_sched_out(struct perf_ev
}

/*
- * Test whether two contexts are equivalent, i.e. whether they
- * have both been cloned from the same version of the same context
- * and they both have the same number of enabled events.
- * If the number of enabled events is the same, then the set
- * of enabled events should be the same, because these are both
- * inherited contexts, therefore we can't access individual events
- * in them directly with an fd; we can only enable/disable all
- * events via prctl, or enable/disable all events in a family
- * via ioctl, which will have the same effect on both contexts.
+ * Test whether two contexts are equivalent, i.e. whether they have both been
+ * cloned from the same version of the same context.
+ *
+ * Equivalence is measured using a generation number in the context that is
+ * incremented on each modification to it; see unclone_ctx(), list_add_event()
+ * and list_del_event().
*/
static int context_equiv(struct perf_event_context *ctx1,
struct perf_event_context *ctx2)
{
- return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx
- && ctx1->parent_gen == ctx2->parent_gen
- && !ctx1->pin_count && !ctx2->pin_count;
+ /* Pinning disables the swap optimization */
+ if (ctx1->pin_count || ctx2->pin_count)
+ return 0;
+
+ /* If ctx1 is the parent of ctx2 */
+ if (ctx1 == ctx2->parent_ctx && ctx1->generation == ctx2->parent_gen)
+ return 1;
+
+ /* If ctx2 is the parent of ctx1 */
+ if (ctx1->parent_ctx == ctx2 && ctx1->parent_gen == ctx2->generation)
+ return 1;
+
+ /*
+ * If ctx1 and ctx2 have the same parent; we flatten the parent
+ * hierarchy, see perf_event_init_context().
+ */
+ if (ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx &&
+ ctx1->parent_gen == ctx2->parent_gen)
+ return 1;
+
+ /* Unmatched */
+ return 0;
}

static void __perf_event_sync_stat(struct perf_event *event,
@@ -2247,7 +2268,7 @@ static void perf_event_context_sched_out
{
struct perf_event_context *ctx = task->perf_event_ctxp[ctxn];
struct perf_event_context *next_ctx;
- struct perf_event_context *parent;
+ struct perf_event_context *parent, *next_parent;
struct perf_cpu_context *cpuctx;
int do_switch = 1;

@@ -2259,10 +2280,18 @@ static void perf_event_context_sched_out
return;

rcu_read_lock();
- parent = rcu_dereference(ctx->parent_ctx);
next_ctx = next->perf_event_ctxp[ctxn];
- if (parent && next_ctx &&
- rcu_dereference(next_ctx->parent_ctx) == parent) {
+ if (!next_ctx)
+ goto unlock;
+
+ parent = rcu_dereference(ctx->parent_ctx);
+ next_parent = rcu_dereference(next_ctx->parent_ctx);
+
+ /* If neither context have a parent context; they cannot be clones. */
+ if (!parent && !next_parent)
+ goto unlock;
+
+ if (next_parent == ctx || next_ctx == parent || next_parent == parent) {
/*
* Looks like the two contexts are clones, so we might be
* able to optimize the context switch. We lock both
@@ -2290,6 +2319,7 @@ static void perf_event_context_sched_out
raw_spin_unlock(&next_ctx->lock);
raw_spin_unlock(&ctx->lock);
}
+unlock:
rcu_read_unlock();

if (do_switch) {
@@ -7128,7 +7158,6 @@ SYSCALL_DEFINE5(perf_event_open,
}

perf_install_in_context(ctx, event, event->cpu);
- ++ctx->generation;
perf_unpin_context(ctx);
mutex_unlock(&ctx->mutex);

@@ -7211,7 +7240,6 @@ perf_event_create_kernel_counter(struct
WARN_ON_ONCE(ctx->parent_ctx);
mutex_lock(&ctx->mutex);
perf_install_in_context(ctx, event, cpu);
- ++ctx->generation;
perf_unpin_context(ctx);
mutex_unlock(&ctx->mutex);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/