[PATCH 05/21] perf: Add event toggle sys_perf_event_open interface

From: Jiri Olsa
Date: Wed Sep 25 2013 - 08:55:56 EST


Adding perf interface that allows to create 'toggle' events,
which can enable or disable another event. Whenever the toggle
event is triggered (has overflow), it toggles another event
state and either starts or stops it.

The goal is to be able to create toggling tracepoint events
to enable and disable HW counters, but the interface is generic
enough to be used for any kind of event.

The interface to create a toggle event is similar as the one
for defining event group. Use perf syscall with:

flags - PERF_FLAG_TOGGLE_ON or PERF_FLAG_TOGGLE_OFF
group_fd - event (or group) fd to be toggled

Created event will toggle ON(start) or OFF(stop) the event
specified via group_fd.

Obviously this way it's not possible for toggle event to
be part of group other than group leader. This will be
possible via ioctl coming in next patch.

Original-patch-by: Frederic Weisbecker <fweisbec@xxxxxxxxx>
Signed-off-by: Jiri Olsa <jolsa@xxxxxxxxxx>
Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
Cc: Corey Ashford <cjashfor@xxxxxxxxxxxxxxxxxx>
Cc: Frederic Weisbecker <fweisbec@xxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxx>
Cc: Paul Mackerras <paulus@xxxxxxxxx>
Cc: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
---
include/linux/perf_event.h | 9 +++
include/uapi/linux/perf_event.h | 3 +
kernel/events/core.c | 158 ++++++++++++++++++++++++++++++++++++++--
3 files changed, 164 insertions(+), 6 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 866e85c..6ede25c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -289,6 +289,12 @@ struct swevent_hlist {
struct perf_cgroup;
struct ring_buffer;

+enum perf_event_toggle_flag {
+ PERF_TOGGLE_NONE = 0,
+ PERF_TOGGLE_ON = 1,
+ PERF_TOGGLE_OFF = 2,
+};
+
/**
* struct perf_event - performance event kernel representation:
*/
@@ -414,6 +420,9 @@ struct perf_event {
int cgrp_defer_enabled;
#endif

+ struct perf_event *toggled_event;
+ enum perf_event_toggle_flag toggle_flag;
+ int paused;
#endif /* CONFIG_PERF_EVENTS */
};

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index ca1d90b..ecb0474 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -694,6 +694,9 @@ enum perf_callchain_context {
#define PERF_FLAG_FD_NO_GROUP (1U << 0)
#define PERF_FLAG_FD_OUTPUT (1U << 1)
#define PERF_FLAG_PID_CGROUP (1U << 2) /* pid=cgroup id, per-cpu mode only */
+#define PERF_FLAG_TOGGLE_ON (1U << 3)
+#define PERF_FLAG_TOGGLE_OFF (1U << 4)
+

union perf_mem_data_src {
__u64 val;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e8674e4..40c792d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -44,6 +44,8 @@

#include <asm/irq_regs.h>

+#define PERF_FLAG_TOGGLE (PERF_FLAG_TOGGLE_ON | PERF_FLAG_TOGGLE_OFF)
+
struct remote_function_call {
struct task_struct *p;
int (*func)(void *info);
@@ -119,7 +121,9 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info)

#define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
PERF_FLAG_FD_OUTPUT |\
- PERF_FLAG_PID_CGROUP)
+ PERF_FLAG_PID_CGROUP |\
+ PERF_FLAG_TOGGLE_ON |\
+ PERF_FLAG_TOGGLE_OFF)

/*
* branch priv levels that need permission checks
@@ -1358,6 +1362,25 @@ out:
perf_event__header_size(tmp);
}

+static void put_event(struct perf_event *event);
+
+static void __perf_event_toggle_detach(struct perf_event *event)
+{
+ struct perf_event *toggled_event = event->toggled_event;
+
+ event->toggle_flag = PERF_TOGGLE_NONE;
+ event->overflow_handler = NULL;
+ event->toggled_event = NULL;
+
+ put_event(toggled_event);
+}
+
+static void perf_event_toggle_detach(struct perf_event *event)
+{
+ if (event->toggle_flag > PERF_TOGGLE_NONE)
+ __perf_event_toggle_detach(event);
+}
+
static inline int
event_filter_match(struct perf_event *event)
{
@@ -1646,6 +1669,7 @@ event_sched_in(struct perf_event *event,
struct perf_event_context *ctx)
{
u64 tstamp = perf_event_time(event);
+ int add_flags = PERF_EF_START;

if (event->state <= PERF_EVENT_STATE_OFF)
return 0;
@@ -1665,7 +1689,10 @@ event_sched_in(struct perf_event *event,
*/
smp_wmb();

- if (event->pmu->add(event, PERF_EF_START)) {
+ if (event->paused)
+ add_flags = 0;
+
+ if (event->pmu->add(event, add_flags)) {
event->state = PERF_EVENT_STATE_INACTIVE;
event->oncpu = -1;
return -EAGAIN;
@@ -2723,7 +2750,7 @@ static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx,
event->pmu->start(event, 0);
}

- if (!event->attr.freq || !event->attr.sample_freq)
+ if (!event->attr.freq || !event->attr.sample_freq || event->paused)
continue;

/*
@@ -3240,7 +3267,7 @@ int perf_event_release_kernel(struct perf_event *event)
raw_spin_unlock_irq(&ctx->lock);
perf_remove_from_context(event);
mutex_unlock(&ctx->mutex);
-
+ perf_event_toggle_detach(event);
free_event(event);

return 0;
@@ -5231,6 +5258,72 @@ static void perf_log_throttle(struct perf_event *event, int enable)
}

/*
+ * TODO:
+ * - fix race when interrupting event_sched_in/event_sched_out
+ * - fix race against other toggler
+ * - fix race against other callers of ->stop/start (adjust period/freq)
+ */
+static void perf_event_toggle(struct perf_event *event,
+ enum perf_event_toggle_flag flag)
+{
+ unsigned long flags;
+ bool active;
+
+ /*
+ * Prevent from races against event->add/del through
+ * preempt_schedule_irq() or enable/disable IPIs
+ */
+ local_irq_save(flags);
+
+ /* Could be out of HW counter. */
+ active = event->state == PERF_EVENT_STATE_ACTIVE;
+
+ switch (flag) {
+ case PERF_TOGGLE_ON:
+ if (!event->paused)
+ break;
+ if (active)
+ event->pmu->start(event, PERF_EF_RELOAD);
+ event->paused = false;
+ break;
+ case PERF_TOGGLE_OFF:
+ if (event->paused)
+ break;
+ if (active)
+ event->pmu->stop(event, PERF_EF_UPDATE);
+ event->paused = true;
+ break;
+ case PERF_TOGGLE_NONE:
+ break;
+ }
+
+ local_irq_restore(flags);
+}
+
+static void
+perf_event_toggle_overflow(struct perf_event *event,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ struct perf_event *toggled_event;
+
+ if (!event->toggle_flag)
+ return;
+
+ toggled_event = event->toggled_event;
+
+ if (WARN_ON_ONCE(!toggled_event))
+ return;
+
+ perf_pmu_disable(toggled_event->pmu);
+
+ perf_event_toggle(toggled_event, event->toggle_flag);
+ perf_event_output(event, data, regs);
+
+ perf_pmu_enable(toggled_event->pmu);
+}
+
+/*
* Generic event overflow handling, sampling.
*/

@@ -6887,6 +6980,43 @@ out:
return ret;
}

+static enum perf_event_toggle_flag get_toggle_flag(unsigned long flags)
+{
+ if ((flags & PERF_FLAG_TOGGLE) == PERF_FLAG_TOGGLE_ON)
+ return PERF_TOGGLE_ON;
+ else if ((flags & PERF_FLAG_TOGGLE) == PERF_FLAG_TOGGLE_OFF)
+ return PERF_TOGGLE_OFF;
+
+ return PERF_TOGGLE_NONE;
+}
+
+static int
+perf_event_set_toggle(struct perf_event *event,
+ struct perf_event *toggled_event,
+ struct perf_event_context *ctx,
+ unsigned long flags)
+{
+ if (WARN_ON_ONCE(!(flags & PERF_FLAG_TOGGLE)))
+ return -EINVAL;
+
+ /* It's either ON or OFF. */
+ if ((flags & PERF_FLAG_TOGGLE) == PERF_FLAG_TOGGLE)
+ return -EINVAL;
+
+ /* Allow only same cpu, */
+ if (toggled_event->cpu != event->cpu)
+ return -EINVAL;
+
+ /* or same task. */
+ if (toggled_event->ctx->task != ctx->task)
+ return -EINVAL;
+
+ event->overflow_handler = perf_event_toggle_overflow;
+ event->toggle_flag = get_toggle_flag(flags);
+ event->toggled_event = toggled_event;
+ return 0;
+}
+
/**
* sys_perf_event_open - open a performance event, associate it to a task/cpu
*
@@ -6900,6 +7030,7 @@ SYSCALL_DEFINE5(perf_event_open,
pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
{
struct perf_event *group_leader = NULL, *output_event = NULL;
+ struct perf_event *toggled_event = NULL;
struct perf_event *event, *sibling;
struct perf_event_attr attr;
struct perf_event_context *ctx;
@@ -6949,7 +7080,9 @@ SYSCALL_DEFINE5(perf_event_open,
group_leader = group.file->private_data;
if (flags & PERF_FLAG_FD_OUTPUT)
output_event = group_leader;
- if (flags & PERF_FLAG_FD_NO_GROUP)
+ if (flags & PERF_FLAG_TOGGLE)
+ toggled_event = group_leader;
+ if (flags & (PERF_FLAG_FD_NO_GROUP|PERF_FLAG_TOGGLE))
group_leader = NULL;
}

@@ -7060,10 +7193,20 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_context;
}

+ if (toggled_event) {
+ err = -EINVAL;
+ if (!atomic_long_inc_not_zero(&toggled_event->refcount))
+ goto err_context;
+
+ err = perf_event_set_toggle(event, toggled_event, ctx, flags);
+ if (err)
+ goto err_toggle;
+ }
+
event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR);
if (IS_ERR(event_file)) {
err = PTR_ERR(event_file);
- goto err_context;
+ goto err_toggle;
}

if (move_group) {
@@ -7131,6 +7274,9 @@ SYSCALL_DEFINE5(perf_event_open,
fd_install(event_fd, event_file);
return event_fd;

+err_toggle:
+ if (toggled_event)
+ put_event(toggled_event);
err_context:
perf_unpin_context(ctx);
put_ctx(ctx);
--
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/