[RFC PATCH 08/17] perf: Allow inheritance for detached events

From: Alexander Shishkin
Date: Tue Sep 05 2017 - 09:42:28 EST


This enables inheritance for detached events. Unlike traditional events,
these do not have parents: inheritance produces a new independent event
with the same attribute. If the 'parent' event has a ring buffer, so will
the new event. Considering the mlock accounting, this buffer allocation
may fail, which in turn will fail the parent's fork, something to be
aware of.

This also effectively disables context cloning, because unlike the
traditional events, these will each have its own ring buffer and
context switch optimization can't work.

Signed-off-by: Alexander Shishkin <alexander.shishkin@xxxxxxxxxxxxxxx>
---
include/linux/perf_event.h | 1 +
kernel/events/core.c | 64 ++++++++++++++++++++++++++++++++++++++--------
2 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index b7939e8811..0b45abad12 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -780,6 +780,7 @@ struct perf_event_context {
int nr_stat;
int nr_freq;
int rotate_disable;
+ int clone_disable;
atomic_t refcount;
struct task_struct *task;

diff --git a/kernel/events/core.c b/kernel/events/core.c
index d62ab2d1de..89c14644df 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -259,11 +259,12 @@ static void event_function_call(struct perf_event *event, event_f func, void *da
.data = data,
};

- if (!event->parent) {
+ if (!event->parent && !ctx->clone_disable) {
/*
* If this is a !child event, we must hold ctx::mutex to
* stabilize the the event->ctx relation. See
* perf_event_ctx_lock().
+ * Note: detached events' ctx is always stable.
*/
lockdep_assert_held(&ctx->mutex);
}
@@ -10169,6 +10170,7 @@ SYSCALL_DEFINE5(perf_event_open,
atomic_long_inc(&event->refcount);
atomic_inc(&event->mmap_count);

+ ctx->clone_disable = 1;
event_file->private_data = event;
}

@@ -10699,14 +10701,18 @@ static void perf_free_event(struct perf_event *event,
{
struct perf_event *parent = event->parent;

- if (WARN_ON_ONCE(!parent))
- return;
+ /*
+ * If a parentless event turns up here, it has to be a detached
+ * event, in case of inherit_event() failure.
+ */

- mutex_lock(&parent->child_mutex);
- list_del_init(&event->child_list);
- mutex_unlock(&parent->child_mutex);
+ if (parent) {
+ mutex_lock(&parent->child_mutex);
+ list_del_init(&event->child_list);
+ mutex_unlock(&parent->child_mutex);

- put_event(parent);
+ put_event(parent);
+ }

raw_spin_lock_irq(&ctx->lock);
perf_group_detach(event);
@@ -10803,6 +10809,7 @@ inherit_event(struct perf_event *parent_event,
struct perf_event_context *child_ctx)
{
enum perf_event_active_state parent_state = parent_event->state;
+ bool detached = is_detached_event(parent_event);
struct perf_event *child_event;
unsigned long flags;

@@ -10815,10 +10822,16 @@ inherit_event(struct perf_event *parent_event,
if (parent_event->parent)
parent_event = parent_event->parent;

+ /*
+ * Detached events don't have parents; instead, inheritance
+ * creates a new independent event, which is accessible via
+ * tracefs.
+ */
child_event = perf_event_alloc(&parent_event->attr,
parent_event->cpu,
child,
- group_leader, parent_event,
+ group_leader,
+ detached ? NULL : parent_event,
NULL, NULL, -1);
if (IS_ERR(child_event))
return child_event;
@@ -10864,6 +10877,29 @@ inherit_event(struct perf_event *parent_event,
child_event->overflow_handler_context
= parent_event->overflow_handler_context;

+ /*
+ * For per-task detached events with ring buffers, set_output doesn't
+ * make sense, but we can allocate a new buffer here. CPU-wide events
+ * don't have inheritance.
+ */
+ if (detached) {
+ int err;
+
+ err = perf_event_detach(child_event, child, NULL);
+ if (err) {
+ perf_free_event(child_event, child_ctx);
+ mutex_unlock(&parent_event->child_mutex);
+ put_event(parent_event);
+ return NULL;
+ }
+
+ /*
+ * Inherited detached events don't use their parent's
+ * ring buffer, so cloning can't work for them.
+ */
+ child_ctx->clone_disable = 1;
+ }
+
/*
* Precalculate sample_data sizes
*/
@@ -10878,11 +10914,17 @@ inherit_event(struct perf_event *parent_event,
raw_spin_unlock_irqrestore(&child_ctx->lock, flags);

/*
- * Link this into the parent event's child list
+ * Link this into the parent event's child list, unless
+ * it's a detached event, see above.
*/
- list_add_tail(&child_event->child_list, &parent_event->child_list);
+ if (!detached)
+ list_add_tail(&child_event->child_list,
+ &parent_event->child_list);
mutex_unlock(&parent_event->child_mutex);

+ if (detached)
+ put_event(parent_event);
+
return child_event;
}

@@ -11042,7 +11084,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn)

child_ctx = child->perf_event_ctxp[ctxn];

- if (child_ctx && inherited_all) {
+ if (child_ctx && inherited_all && !child_ctx->clone_disable) {
/*
* Mark the child context as a clone of the parent
* context, or of whatever the parent is a clone of.
--
2.14.1