[RFC PATCH 06/17] perf: Add buffers to the detached events

From: Alexander Shishkin
Date: Tue Sep 05 2017 - 09:41:43 EST


Detached events make much more sense with ring buffers, which the user
can mmap and read a snapshot of. Unlike the normal perf events, these
ring buffers are allocated by the perf syscall, the sizes of data and
aux areas are specified in the event attribute.

These ring buffers can be mmapped read-only.

Signed-off-by: Alexander Shishkin <alexander.shishkin@xxxxxxxxxxxxxxx>
---
include/uapi/linux/perf_event.h | 3 +++
kernel/events/core.c | 19 ++++++++++++++++
kernel/events/internal.h | 2 ++
kernel/events/ring_buffer.c | 50 +++++++++++++++++++++++++++++++++++++++++
4 files changed, 74 insertions(+)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 89355584fa..3d64d9ea80 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -297,6 +297,7 @@ enum perf_event_read_format {
/* add: sample_stack_user */
#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */
#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */
+#define PERF_ATTR_SIZE_VER6 120 /* add: detached_* */

/*
* Hardware event_id to monitor via a performance monitoring event:
@@ -415,6 +416,8 @@ struct perf_event_attr {
__u32 aux_watermark;
__u16 sample_max_stack;
__u16 __reserved_2; /* align to __u64 */
+ __u32 detached_nr_pages;
+ __u32 detached_aux_nr_pages;
};

#define perf_flags(attr) (*(&(attr)->read_format + 1))
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 320070410d..fef1f97974 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4185,6 +4185,9 @@ static void _free_event(struct perf_event *event)
tracefs_remove(event->dent);

event->attach_state &= ~PERF_ATTACH_DETACHED;
+
+ ring_buffer_unaccount(event->rb, false);
+ rb_free_detached(event->rb, event);
}

if (event->rb) {
@@ -5012,6 +5015,10 @@ static int perf_mmap_fault(struct vm_fault *vmf)
int ret = VM_FAULT_SIGBUS;

if (vmf->flags & FAULT_FLAG_MKWRITE) {
+ /* detached events R/O only */
+ if (event->dent)
+ return ret;
+
if (vmf->pgoff == 0)
ret = 0;
return ret;
@@ -9420,6 +9427,7 @@ static int perf_event_detach(struct perf_event *event, struct task_struct *task,
struct mm_struct *mm)
{
char *filename;
+ int err;

filename = kasprintf(GFP_KERNEL, "%s:%x.event",
task ? "task" : "cpu",
@@ -9435,6 +9443,13 @@ static int perf_event_detach(struct perf_event *event, struct task_struct *task,
if (!event->dent)
return -ENOMEM;

+ err = rb_alloc_detached(event);
+ if (err) {
+ tracefs_remove(event->dent);
+ event->dent = NULL;
+ return err;
+ }
+
return 0;
}
/*
@@ -10017,6 +10032,9 @@ SYSCALL_DEFINE5(perf_event_open,
if (output_event || (group_fd != -1))
goto err_task;

+ if (!attr.detached_nr_pages)
+ goto err_task;
+
detached = 1;
}

@@ -10174,6 +10192,7 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_context;

atomic_long_inc(&event->refcount);
+ atomic_inc(&event->mmap_count);

event_file->private_data = event;
}
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 59136a0e98..8e267d8faa 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -82,6 +82,8 @@ extern void perf_event_wakeup(struct perf_event *event);
extern int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
pgoff_t pgoff, int nr_pages, long watermark, int flags);
extern void rb_free_aux(struct ring_buffer *rb);
+extern int rb_alloc_detached(struct perf_event *event);
+extern void rb_free_detached(struct ring_buffer *rb, struct perf_event *event);
extern struct ring_buffer *ring_buffer_get(struct perf_event *event);
extern void ring_buffer_put(struct ring_buffer *rb);

diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index d36f169cae..b4d7841025 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -760,6 +760,56 @@ void rb_free_aux(struct ring_buffer *rb)
}
}

+/*
+ * Allocate a ring_buffer for a detached event and attach it to this event.
+ * There's one ring_buffer per detached event and vice versa, so
+ * ring_buffer_attach() does not apply.
+ */
+int rb_alloc_detached(struct perf_event *event)
+{
+ int aux_nr_pages = event->attr.detached_aux_nr_pages;
+ int nr_pages = event->attr.detached_nr_pages;
+ struct ring_buffer *rb;
+ int ret, pgoff = nr_pages + 1;
+
+ /*
+ * Use overwrite mode (!RING_BUFFER_WRITABLE) for both data and aux
+ * areas as we don't want wakeups or interrupts.
+ */
+ rb = rb_alloc(NULL, nr_pages, 0, event->cpu, 0);
+ if (IS_ERR(rb))
+ return PTR_ERR(rb);
+
+ ret = rb_alloc_aux(rb, event, pgoff, aux_nr_pages, 0, 0);
+ if (ret) {
+ rb_free(rb);
+ return ret;
+ }
+
+ atomic_set(&rb->mmap_count, 1);
+ if (aux_nr_pages)
+ atomic_set(&rb->aux_mmap_count, 1);
+
+ /*
+ * Detached events don't need ring buffer wakeups, therefore we don't
+ * use ring_buffer_attach() here and event->rb_entry stays empty.
+ */
+ rcu_assign_pointer(event->rb, rb);
+
+ return 0;
+}
+
+void rb_free_detached(struct ring_buffer *rb, struct perf_event *event)
+{
+ /* Must be the last one */
+ WARN_ON_ONCE(atomic_read(&rb->refcount) != 1);
+
+ atomic_set(&rb->aux_mmap_count, 0);
+ rcu_assign_pointer(event->rb, NULL);
+ rb_free_aux(rb);
+ rb_free(rb);
+}
+
#ifndef CONFIG_PERF_USE_VMALLOC

/*
--
2.14.1