Re: [PATCH 10/46] perf core: Introduce new ioctl options to pause and resume ring buffer

From: Arnaldo Carvalho de Melo
Date: Mon Feb 29 2016 - 10:39:40 EST


Em Fri, Feb 26, 2016 at 09:31:58AM +0000, Wang Nan escreveu:
> Add new ioctl() to pause/resume ring-buffer output.
>
> In some situations we want to read from ring buffer only when we
> ensure nothing can write to the ring buffer during reading. Without
> this patch we have to turn off all events attached to this ring buffer
> to achieve this.
>
> This patch is for supporting overwrite ring buffer. Following
> commits will introduce new methods support reading from overwrite ring
> buffer. Before reading caller must ensure the ring buffer is frozen, or
> the reading is unreliable.

Peter, have you have the chance too look at this and the other kernel
bits in this kit?

- Arnaldo

> Signed-off-by: Wang Nan <wangnan0@xxxxxxxxxx>
> Cc: He Kuang <hekuang@xxxxxxxxxx>
> Cc: Alexei Starovoitov <ast@xxxxxxxxxx>
> Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
> Cc: Brendan Gregg <brendan.d.gregg@xxxxxxxxx>
> Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
> Cc: Masami Hiramatsu <masami.hiramatsu.pt@xxxxxxxxxxx>
> Cc: Namhyung Kim <namhyung@xxxxxxxxxx>
> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> Cc: Zefan Li <lizefan@xxxxxxxxxx>
> Cc: pi3orama@xxxxxxx
> ---
> include/uapi/linux/perf_event.h | 1 +
> kernel/events/core.c | 13 +++++++++++++
> kernel/events/internal.h | 11 +++++++++++
> kernel/events/ring_buffer.c | 7 ++++++-
> 4 files changed, 31 insertions(+), 1 deletion(-)
>
> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> index 1afe962..a3c1903 100644
> --- a/include/uapi/linux/perf_event.h
> +++ b/include/uapi/linux/perf_event.h
> @@ -401,6 +401,7 @@ struct perf_event_attr {
> #define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *)
> #define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *)
> #define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
> +#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32)
>
> enum perf_event_ioc_flags {
> PERF_IOC_FLAG_GROUP = 1U << 0,
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 94c47e3..a7075ae 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -4231,6 +4231,19 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
> case PERF_EVENT_IOC_SET_BPF:
> return perf_event_set_bpf_prog(event, arg);
>
> + case PERF_EVENT_IOC_PAUSE_OUTPUT: {
> + struct ring_buffer *rb;
> +
> + rcu_read_lock();
> + rb = rcu_dereference(event->rb);
> + if (!event->rb) {
> + rcu_read_unlock();
> + return -EINVAL;
> + }
> + rb_toggle_paused(rb, !!arg);
> + rcu_read_unlock();
> + return 0;
> + }
> default:
> return -ENOTTY;
> }
> diff --git a/kernel/events/internal.h b/kernel/events/internal.h
> index 2bbad9c..6a93d1b 100644
> --- a/kernel/events/internal.h
> +++ b/kernel/events/internal.h
> @@ -18,6 +18,7 @@ struct ring_buffer {
> #endif
> int nr_pages; /* nr of data pages */
> int overwrite; /* can overwrite itself */
> + int paused; /* can write into ring buffer */
>
> atomic_t poll; /* POLL_ for wakeups */
>
> @@ -65,6 +66,16 @@ static inline void rb_free_rcu(struct rcu_head *rcu_head)
> rb_free(rb);
> }
>
> +static inline void
> +rb_toggle_paused(struct ring_buffer *rb,
> + bool pause)
> +{
> + if (!pause && rb->nr_pages)
> + rb->paused = 0;
> + else
> + rb->paused = 1;
> +}
> +
> extern struct ring_buffer *
> rb_alloc(int nr_pages, long watermark, int cpu, int flags);
> extern void perf_event_wakeup(struct perf_event *event);
> diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
> index 1faad2c..22e1a47 100644
> --- a/kernel/events/ring_buffer.c
> +++ b/kernel/events/ring_buffer.c
> @@ -125,8 +125,11 @@ int perf_output_begin(struct perf_output_handle *handle,
> if (unlikely(!rb))
> goto out;
>
> - if (unlikely(!rb->nr_pages))
> + if (unlikely(rb->paused)) {
> + if (rb->nr_pages)
> + local_inc(&rb->lost);
> goto out;
> + }
>
> handle->rb = rb;
> handle->event = event;
> @@ -244,6 +247,8 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
> INIT_LIST_HEAD(&rb->event_list);
> spin_lock_init(&rb->event_lock);
> init_irq_work(&rb->irq_work, rb_irq_work);
> +
> + rb->paused = rb->nr_pages ? 0 : 1;
> }
>
> static void ring_buffer_put_async(struct ring_buffer *rb)
> --
> 1.8.3.4