[PATCH] perf core: Introduce new ioctl options to pause and resume ring buffer

From: Wang Nan
Date: Mon Jan 18 2016 - 06:52:58 EST


Add an extra ioctl() to pause/resume ring-buffer output.

In some situations we want to read from ring buffer only when we
ensure nothing can write to the ring buffer during reading. Without
this patch we have to turn off all events attached to this ring buffer.
This patch is for supporting overwritable ring buffer with TAILSIZE
selected.

Signed-off-by: Wang Nan <wangnan0@xxxxxxxxxx>
Cc: He Kuang <hekuang@xxxxxxxxxx>
Cc: Alexei Starovoitov <ast@xxxxxxxxxx>
Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
Cc: Brendan Gregg <brendan.d.gregg@xxxxxxxxx>
Cc: David S. Miller <davem@xxxxxxxxxxxxx>
Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@xxxxxxxxxxx>
Cc: Namhyung Kim <namhyung@xxxxxxxxxx>
Cc: Zefan Li <lizefan@xxxxxxxxxx>
Cc: pi3orama@xxxxxxx
---
include/uapi/linux/perf_event.h | 2 ++
kernel/events/core.c | 14 ++++++++++++++
kernel/events/internal.h | 11 +++++++++++
kernel/events/ring_buffer.c | 4 +++-
4 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 4e8dde8..9508070 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -402,6 +402,8 @@ struct perf_event_attr {
#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *)
#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *)
#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
+#define PERF_EVENT_IOC_PAUSE_OUTPUT _IO ('$', 9)
+#define PERF_EVENT_IOC_RESUME_OUTPUT _IO ('$', 10)

enum perf_event_ioc_flags {
PERF_IOC_FLAG_GROUP = 1U << 0,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 2d59b59..d5a0c34 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4241,6 +4241,20 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
case PERF_EVENT_IOC_SET_BPF:
return perf_event_set_bpf_prog(event, arg);

+ case PERF_EVENT_IOC_PAUSE_OUTPUT:
+ case PERF_EVENT_IOC_RESUME_OUTPUT: {
+ struct ring_buffer *rb;
+
+ rcu_read_lock();
+ rb = rcu_dereference(event->rb);
+ if (!event->rb) {
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+ rb_toggle_paused(rb, cmd == PERF_EVENT_IOC_PAUSE_OUTPUT);
+ rcu_read_unlock();
+ return 0;
+ }
default:
return -ENOTTY;
}
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 2bbad9c..6a93d1b 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -18,6 +18,7 @@ struct ring_buffer {
#endif
int nr_pages; /* nr of data pages */
int overwrite; /* can overwrite itself */
+ int paused; /* can write into ring buffer */

atomic_t poll; /* POLL_ for wakeups */

@@ -65,6 +66,16 @@ static inline void rb_free_rcu(struct rcu_head *rcu_head)
rb_free(rb);
}

+static inline void
+rb_toggle_paused(struct ring_buffer *rb,
+ bool pause)
+{
+ if (!pause && rb->nr_pages)
+ rb->paused = 0;
+ else
+ rb->paused = 1;
+}
+
extern struct ring_buffer *
rb_alloc(int nr_pages, long watermark, int cpu, int flags);
extern void perf_event_wakeup(struct perf_event *event);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 5f8bd89..11a1676 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -125,7 +125,7 @@ int perf_output_begin(struct perf_output_handle *handle,
if (unlikely(!rb))
goto out;

- if (unlikely(!rb->nr_pages))
+ if (unlikely(rb->paused))
goto out;

handle->rb = rb;
@@ -245,6 +245,8 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
INIT_LIST_HEAD(&rb->event_list);
spin_lock_init(&rb->event_lock);
init_irq_work(&rb->irq_work, rb_irq_work);
+
+ rb->paused = rb->nr_pages ? 0 : 1;
}

static void ring_buffer_put_async(struct ring_buffer *rb)
--
1.8.3.4