Re: [PATCH 1/3] perf: Do not allow to create kernel events without handler

From: Peter Zijlstra
Date: Fri Aug 01 2014 - 10:57:10 EST


On Fri, Aug 01, 2014 at 02:33:00PM +0200, Jiri Olsa wrote:
> Force kernel events to specify the handler, because
> there's no use for kernel perf event without it.
>

I think I found a reason; although there is currently no such user, the
simple counting events, they don't have overflow handlers at all.

I think I did on once, but never merged that code because it was a quick
dev hack to create nice changelog numbers etc..

/me goes dig... found it:


---
include/linux/perf_event.h | 1
kernel/events/core.c | 22 +++++++-
kernel/sched/clock.c | 118 +++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 138 insertions(+), 3 deletions(-)

--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -561,6 +561,7 @@ extern void perf_pmu_migrate_context(str
int src_cpu, int dst_cpu);
extern u64 perf_event_read_value(struct perf_event *event,
u64 *enabled, u64 *running);
+extern u64 perf_event_read(struct perf_event *event);


struct perf_sample_data {
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2973,15 +2973,31 @@ static inline u64 perf_event_count(struc
return local64_read(&event->count) + atomic64_read(&event->child_count);
}

-static u64 perf_event_read(struct perf_event *event)
+u64 perf_event_read(struct perf_event *event)
{
/*
* If event is enabled and currently active on a CPU, update the
* value in the event structure:
*/
if (event->state == PERF_EVENT_STATE_ACTIVE) {
- smp_call_function_single(event->oncpu,
- __perf_event_read, event, 1);
+ /*
+ * If the event is for the current task, its guaranteed that we
+ * never need the cross cpu call, and therefore can allow this
+ * to be called with IRQs disabled.
+ *
+ * Avoids the warning otherwise generated by
+ * smp_call_function_single().
+ */
+ if (event->ctx->task == current) {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __perf_event_read(event);
+ local_irq_restore(flags);
+ } else {
+ smp_call_function_single(event->oncpu,
+ __perf_event_read, event, 1);
+ }
} else if (event->state == PERF_EVENT_STATE_INACTIVE) {
struct perf_event_context *ctx = event->ctx;
unsigned long flags;
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -387,3 +387,121 @@ u64 local_clock(void)

EXPORT_SYMBOL_GPL(cpu_clock);
EXPORT_SYMBOL_GPL(local_clock);
+
+#include <linux/perf_event.h>
+
+static char sched_clock_cache[12*1024*1024]; /* 12M l3 cache */
+static struct perf_event *__sched_clock_cycles;
+
+static __init u64 sched_clock_cycles(void)
+{
+ return perf_event_read(__sched_clock_cycles);
+}
+
+static __init noinline void sched_clock_wipe_cache(void)
+{
+ int i;
+
+ for (i = 0; i < sizeof(sched_clock_cache); i++)
+ ACCESS_ONCE(sched_clock_cache[i]) = 0;
+}
+
+static __always_inline u64 cache_cold_clock(u64 (*clock)(void))
+{
+ u64 cycles;
+
+ local_irq_disable();
+ sched_clock_wipe_cache();
+ cycles = sched_clock_cycles();
+ (void)clock();
+ cycles = sched_clock_cycles() - cycles;
+ local_irq_enable();
+
+ return cycles;
+}
+
+static __init void do_bench(void)
+{
+ u64 cycles;
+ u64 tmp;
+ int i;
+
+ printk("sched_clock_stable: %d\n", sched_clock_stable);
+
+ cycles = 0;
+ for (i = 0; i < 1000; i++)
+ cycles += cache_cold_clock(&sched_clock);
+
+ printk("(cold) sched_clock: %lu\n", cycles);
+
+ cycles = 0;
+ for (i = 0; i < 1000; i++)
+ cycles += cache_cold_clock(&local_clock);
+
+ printk("(cold) local_clock: %lu\n", cycles);
+
+ local_irq_disable();
+ ACCESS_ONCE(tmp) = sched_clock();
+
+ cycles = sched_clock_cycles();
+
+ for (i = 0; i < 1000; i++)
+ ACCESS_ONCE(tmp) = sched_clock();
+
+ cycles = sched_clock_cycles() - cycles;
+ local_irq_enable();
+
+ printk("(warm) sched_clock: %lu\n", cycles);
+
+ local_irq_disable();
+ ACCESS_ONCE(tmp) = local_clock();
+
+ cycles = sched_clock_cycles();
+
+ for (i = 0; i < 1000; i++)
+ ACCESS_ONCE(tmp) = local_clock();
+
+ cycles = sched_clock_cycles() - cycles;
+ local_irq_enable();
+
+ printk("(warm) local_clock: %lu\n", cycles);
+
+ local_irq_disable();
+ rdtscll(ACCESS_ONCE(tmp));
+
+ cycles = sched_clock_cycles();
+
+ for (i = 0; i < 1000; i++)
+ rdtscll(ACCESS_ONCE(tmp));
+
+ cycles = sched_clock_cycles() - cycles;
+ local_irq_enable();
+
+ printk("(warm) rdtsc: %lu\n", cycles);
+}
+
+static __init int sched_clock_bench(void)
+{
+ struct perf_event_attr perf_attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .size = sizeof(struct perf_event_attr),
+ .pinned = 1,
+ };
+
+ __sched_clock_cycles = perf_event_create_kernel_counter(&perf_attr, -1, current, NULL, NULL);
+
+ sched_clock_stable = 1;
+ do_bench();
+
+ sched_clock_stable = 0;
+ do_bench();
+
+ sched_clock_stable = 1;
+
+ perf_event_release_kernel(__sched_clock_cycles);
+
+ return 0;
+}
+
+late_initcall(sched_clock_bench);

Attachment: pgpMqbpRUxp3X.pgp
Description: PGP signature