[tip:perf/core] perf/ring_buffer: Introduce new ioctl options to pause and resume the ring-buffer

From: tip-bot for Wang Nan
Date: Thu Mar 31 2016 - 05:27:25 EST


Commit-ID: 86e7972f690c1017fd086cdfe53d8524e68c661c
Gitweb: http://git.kernel.org/tip/86e7972f690c1017fd086cdfe53d8524e68c661c
Author: Wang Nan <wangnan0@xxxxxxxxxx>
AuthorDate: Mon, 28 Mar 2016 06:41:29 +0000
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitDate: Thu, 31 Mar 2016 10:30:45 +0200

perf/ring_buffer: Introduce new ioctl options to pause and resume the ring-buffer

Add new ioctl() to pause/resume ring-buffer output.

In some situations we want to read from the ring-buffer only when we
ensure nothing can write to the ring-buffer during reading. Without
this patch we have to turn off all events attached to this ring-buffer
to achieve this.

This patch is a prerequisite to enable overwrite support for the
perf ring-buffer support. Following commits will introduce new methods
support reading from overwrite ring buffer. Before reading, caller
must ensure the ring buffer is frozen, or the reading is unreliable.

Signed-off-by: Wang Nan <wangnan0@xxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Cc: <pi3orama@xxxxxxx>
Cc: Alexander Shishkin <alexander.shishkin@xxxxxxxxxxxxxxx>
Cc: Alexei Starovoitov <ast@xxxxxxxxxx>
Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
Cc: Brendan Gregg <brendan.d.gregg@xxxxxxxxx>
Cc: He Kuang <hekuang@xxxxxxxxxx>
Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@xxxxxxxxxxx>
Cc: Namhyung Kim <namhyung@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Stephane Eranian <eranian@xxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Vince Weaver <vincent.weaver@xxxxxxxxx>
Cc: Zefan Li <lizefan@xxxxxxxxxx>
Link: http://lkml.kernel.org/r/1459147292-239310-2-git-send-email-wangnan0@xxxxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
include/uapi/linux/perf_event.h | 1 +
kernel/events/core.c | 13 +++++++++++++
kernel/events/internal.h | 9 +++++++++
kernel/events/ring_buffer.c | 12 +++++++++++-
4 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 1afe962..a3c1903 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -401,6 +401,7 @@ struct perf_event_attr {
#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *)
#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *)
#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
+#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32)

enum perf_event_ioc_flags {
PERF_IOC_FLAG_GROUP = 1U << 0,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 243df4b..51386e8 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4379,6 +4379,19 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
case PERF_EVENT_IOC_SET_BPF:
return perf_event_set_bpf_prog(event, arg);

+ case PERF_EVENT_IOC_PAUSE_OUTPUT: {
+ struct ring_buffer *rb;
+
+ rcu_read_lock();
+ rb = rcu_dereference(event->rb);
+ if (!rb || !rb->nr_pages) {
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+ rb_toggle_paused(rb, !!arg);
+ rcu_read_unlock();
+ return 0;
+ }
default:
return -ENOTTY;
}
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 2b229fd..2d67327 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -17,6 +17,7 @@ struct ring_buffer {
#endif
int nr_pages; /* nr of data pages */
int overwrite; /* can overwrite itself */
+ int paused; /* can write into ring buffer */

atomic_t poll; /* POLL_ for wakeups */

@@ -64,6 +65,14 @@ static inline void rb_free_rcu(struct rcu_head *rcu_head)
rb_free(rb);
}

+static inline void rb_toggle_paused(struct ring_buffer *rb, bool pause)
+{
+ if (!pause && rb->nr_pages)
+ rb->paused = 0;
+ else
+ rb->paused = 1;
+}
+
extern struct ring_buffer *
rb_alloc(int nr_pages, long watermark, int cpu, int flags);
extern void perf_event_wakeup(struct perf_event *event);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 0ed4555..72d8127 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -125,8 +125,11 @@ int perf_output_begin(struct perf_output_handle *handle,
if (unlikely(!rb))
goto out;

- if (unlikely(!rb->nr_pages))
+ if (unlikely(rb->paused)) {
+ if (rb->nr_pages)
+ local_inc(&rb->lost);
goto out;
+ }

handle->rb = rb;
handle->event = event;
@@ -241,6 +244,13 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)

INIT_LIST_HEAD(&rb->event_list);
spin_lock_init(&rb->event_lock);
+
+ /*
+ * perf_output_begin() only checks rb->paused, therefore
+ * rb->paused must be true if we have no pages for output.
+ */
+ if (!rb->nr_pages)
+ rb->paused = 1;
}

/*