[RFC PATCH 1/1] kernel/events: Introduce IOC_COUNT_RECORDS
From: Naveen N. Rao
Date: Tue Jun 06 2017 - 11:17:56 EST
Many perf sideband events (context switches, namespaces, ...) are useful
by themselves without the need for subscribing to any overflow events.
However, it is not possible to subscribe for notifications when such
records are logged into the ring buffer. Introduce IOC_COUNT_RECORDS as
a way to request this.
With IOC_COUNT_RECORDS set, IOC_REFRESH takes the number of records
after which to generate a notification, rather than the number of
overflow events.
Signed-off-by: Naveen N. Rao <naveen.n.rao@xxxxxxxxxxxxxxxxxx>
---
include/linux/perf_event.h | 1 +
include/uapi/linux/perf_event.h | 1 +
kernel/events/core.c | 16 +++++++++++++++-
kernel/events/ring_buffer.c | 9 +++++++++
4 files changed, 26 insertions(+), 1 deletion(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 24a635887f28..016f2da2bba7 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -683,6 +683,7 @@ struct perf_event {
struct irq_work pending;
atomic_t event_limit;
+ bool count_records;
/* address range filters */
struct perf_addr_filters_head addr_filters;
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index b1c0b187acfe..fb989ac71ded 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -408,6 +408,7 @@ struct perf_event_attr {
#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *)
#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32)
+#define PERF_EVENT_IOC_COUNT_RECORDS _IO ('$', 10)
enum perf_event_ioc_flags {
PERF_IOC_FLAG_GROUP = 1U << 0,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6e75a5c9412d..637064880b36 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2674,6 +2674,16 @@ void perf_event_addr_filters_sync(struct perf_event *event)
}
EXPORT_SYMBOL_GPL(perf_event_addr_filters_sync);
+static int _perf_event_count_records(struct perf_event *event)
+{
+ if (event->attr.inherit || !is_sampling_event(event))
+ return -EINVAL;
+
+ event->count_records = 1;
+
+ return 0;
+}
+
static int _perf_event_refresh(struct perf_event *event, int refresh)
{
/*
@@ -4699,6 +4709,9 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
func = _perf_event_reset;
break;
+ case PERF_EVENT_IOC_COUNT_RECORDS:
+ return _perf_event_count_records(event);
+
case PERF_EVENT_IOC_REFRESH:
return _perf_event_refresh(event, arg);
@@ -7342,7 +7355,8 @@ static int __perf_event_overflow(struct perf_event *event,
*/
event->pending_kill = POLL_IN;
- if (events && atomic_dec_and_test(&event->event_limit)) {
+ if (events && !event->count_records &&
+ atomic_dec_and_test(&event->event_limit)) {
ret = 1;
event->pending_kill = POLL_HUP;
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 2831480c63a2..9b9ca0608fed 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -126,6 +126,7 @@ __perf_output_begin(struct perf_output_handle *handle,
u64 id;
u64 lost;
} lost_event;
+ int events = atomic_read(&event->event_limit);
rcu_read_lock();
/*
@@ -197,6 +198,14 @@ __perf_output_begin(struct perf_output_handle *handle,
if (unlikely(head - local_read(&rb->wakeup) > rb->watermark))
local_add(rb->watermark, &rb->wakeup);
+ if (events && event->count_records &&
+ atomic_dec_and_test(&event->event_limit)) {
+ event->pending_kill = POLL_HUP;
+ local_inc(&rb->wakeup);
+
+ perf_event_disable_inatomic(event);
+ }
+
page_shift = PAGE_SHIFT + page_order(rb);
handle->page = (offset >> page_shift) & (rb->nr_pages - 1);
--
2.12.2