[PATCH V2 01/13] perf/core: Introduce PERF_RECORD_OVERHEAD

From: kan . liang
Date: Fri Dec 02 2016 - 16:20:48 EST


From: Kan Liang <kan.liang@xxxxxxxxx>

A new perf record is introduced to export perf overhead information to
userspace. So the user can measure the overhead of sampling directly.
If the user doesn't want to use this feature, it can be switched off by
configuring the user space tool.

To output the overhead information, it takes advantage of the existing
event log mechanism. But the overhead information is the system
overhead, not per-event overhead.

Signed-off-by: Kan Liang <kan.liang@xxxxxxxxx>
---
include/linux/perf_event.h | 9 ++++++++
include/uapi/linux/perf_event.h | 39 +++++++++++++++++++++++++++++++++-
kernel/events/core.c | 46 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 93 insertions(+), 1 deletion(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4741ecd..5bc8156 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -998,6 +998,10 @@ perf_event__output_id_sample(struct perf_event *event,
extern void
perf_log_lost_samples(struct perf_event *event, u64 lost);

+extern void
+perf_log_overhead(struct perf_event *event, u64 type,
+ struct perf_overhead_entry *entry);
+
static inline bool is_sampling_event(struct perf_event *event)
{
return event->attr.sample_period != 0;
@@ -1221,6 +1225,11 @@ static inline bool has_addr_filter(struct perf_event *event)
return event->pmu->nr_addr_filters;
}

+static inline bool needs_log_overhead(struct perf_event *event)
+{
+ return !!event->attr.overhead;
+}
+
/*
* An inherited event uses parent's filters
*/
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index c66a485..bb0ecf0 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -344,7 +344,8 @@ struct perf_event_attr {
use_clockid : 1, /* use @clockid for time fields */
context_switch : 1, /* context switch data */
write_backward : 1, /* Write ring buffer from end to beginning */
- __reserved_1 : 36;
+ overhead : 1, /* Log overhead information */
+ __reserved_1 : 35;

union {
__u32 wakeup_events; /* wakeup every n events */
@@ -862,6 +863,17 @@ enum perf_event_type {
*/
PERF_RECORD_SWITCH_CPU_WIDE = 15,

+ /*
+ * Records perf overhead
+ * struct {
+ * struct perf_event_header header;
+ * u64 type;
+ * struct perf_overhead_entry entry;
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_OVERHEAD = 16,
+
PERF_RECORD_MAX, /* non-ABI */
};

@@ -980,4 +992,29 @@ struct perf_branch_entry {
reserved:44;
};

+/*
+ * The overhead type could be different among architectures.
+ * The common overhead type can be defined from PERF_CORE_OVERHEAD
+ * The arch specific type should be defined from PERF_PMU_OVERHEAD
+ */
+enum perf_record_overhead_type {
+ PERF_CORE_OVERHEAD = 0,
+
+ PERF_PMU_OVERHEAD = 20,
+
+ PERF_OVERHEAD_MAX,
+};
+
+/*
+ * single overhead record layout:
+ *
+ * nr: Times of overhead happens.
+ * E.g. for NMI, nr == times of NMI handler are called.
+ * time: Total overhead cost(ns)
+ */
+struct perf_overhead_entry {
+ __u64 nr;
+ __u64 time;
+};
+
#endif /* _UAPI_LINUX_PERF_EVENT_H */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6ee1feb..5312744 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7037,6 +7037,52 @@ static void perf_log_itrace_start(struct perf_event *event)
perf_output_end(&handle);
}

+
+/*
+ * Record overhead information
+ *
+ * The overhead logged here is the system overhead, not per-event overhead.
+ * This function only take advantage of the existing event log mechanism
+ * to log the overhead information.
+ *
+ */
+void perf_log_overhead(struct perf_event *event, u64 type,
+ struct perf_overhead_entry *entry)
+{
+ struct perf_output_handle handle;
+ struct perf_sample_data sample;
+ int ret;
+
+ struct {
+ struct perf_event_header header;
+ u64 type;
+ struct perf_overhead_entry overhead;
+ } overhead_event = {
+ .header = {
+ .type = PERF_RECORD_OVERHEAD,
+ .misc = 0,
+ .size = sizeof(overhead_event),
+ },
+ .type = type,
+ .overhead = {
+ .nr = entry->nr,
+ .time = entry->time,
+ },
+ };
+
+ perf_event_header__init_id(&overhead_event.header, &sample, event);
+ ret = perf_output_begin(&handle, event, overhead_event.header.size);
+
+ if (ret)
+ return;
+
+ perf_output_put(&handle, overhead_event);
+ perf_event__output_id_sample(event, &handle, &sample);
+
+ perf_output_end(&handle);
+ memset(entry, 0, sizeof(*entry));
+}
+
/*
* Generic event overflow handling, sampling.
*/
--
2.5.5