[PATCH v5 1/3] perf/core: store context switch out type into Perf trace

From: Alexey Budankov
Date: Fri Mar 30 2018 - 01:00:22 EST



Store preempting context switch out event into Perf trace as a part of
PERF_RECORD_SWITCH[_CPU_WIDE] record.

Percentage of preempting and non-preempting context switches help
understanding the nature of workloads (CPU or IO bound) that are running
on a machine;

The event is treated as preemption one when task->state value of the
thread being switched out is TASK_RUNNING. Event type encoding is
implemented using PERF_RECORD_MISC_SWITCH_OUT_PREEMPT bit;

Signed-off-by: Alexey Budankov <alexey.budankov@xxxxxxxxxxxxxxx>
---
include/uapi/linux/perf_event.h | 18 +++++++++++++++---
kernel/events/core.c | 4 ++++
tools/include/uapi/linux/perf_event.h | 18 +++++++++++++++---
3 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 912b85b52344..b8e288a1f740 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -650,11 +650,23 @@ struct perf_event_mmap_page {
#define PERF_RECORD_MISC_COMM_EXEC (1 << 13)
#define PERF_RECORD_MISC_SWITCH_OUT (1 << 13)
/*
- * Indicates that the content of PERF_SAMPLE_IP points to
- * the actual instruction that triggered the event. See also
- * perf_event_attr::precise_ip.
+ * These PERF_RECORD_MISC_* flags below are safely reused
+ * for the following events:
+ *
+ * PERF_RECORD_MISC_EXACT_IP - PERF_RECORD_SAMPLE of precise events
+ * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT - PERF_RECORD_SWITCH* events
+ *
+ *
+ * PERF_RECORD_MISC_EXACT_IP:
+ * Indicates that the content of PERF_SAMPLE_IP points to
+ * the actual instruction that triggered the event. See also
+ * perf_event_attr::precise_ip.
+ *
+ * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT:
+ * Indicates that thread was preempted in TASK_RUNNING state.
*/
#define PERF_RECORD_MISC_EXACT_IP (1 << 14)
+#define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14)
/*
* Reserve the last bit to indicate some extended misc field
*/
diff --git a/kernel/events/core.c b/kernel/events/core.c
index fc1c330c6bd6..872a5aaa77eb 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7584,6 +7584,10 @@ static void perf_event_switch(struct task_struct *task,
},
};

+ if (!sched_in && task->state == TASK_RUNNING)
+ switch_event.event_id.header.misc |=
+ PERF_RECORD_MISC_SWITCH_OUT_PREEMPT;
+
perf_iterate_sb(perf_event_switch_output,
&switch_event,
NULL);
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 912b85b52344..b8e288a1f740 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -650,11 +650,23 @@ struct perf_event_mmap_page {
#define PERF_RECORD_MISC_COMM_EXEC (1 << 13)
#define PERF_RECORD_MISC_SWITCH_OUT (1 << 13)
/*
- * Indicates that the content of PERF_SAMPLE_IP points to
- * the actual instruction that triggered the event. See also
- * perf_event_attr::precise_ip.
+ * These PERF_RECORD_MISC_* flags below are safely reused
+ * for the following events:
+ *
+ * PERF_RECORD_MISC_EXACT_IP - PERF_RECORD_SAMPLE of precise events
+ * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT - PERF_RECORD_SWITCH* events
+ *
+ *
+ * PERF_RECORD_MISC_EXACT_IP:
+ * Indicates that the content of PERF_SAMPLE_IP points to
+ * the actual instruction that triggered the event. See also
+ * perf_event_attr::precise_ip.
+ *
+ * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT:
+ * Indicates that thread was preempted in TASK_RUNNING state.
*/
#define PERF_RECORD_MISC_EXACT_IP (1 << 14)
+#define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14)
/*
* Reserve the last bit to indicate some extended misc field
*/