[PATCH v4 1/3] perf/core: store context switch out type into Perf trace
From: Alexey Budankov
Date: Thu Mar 29 2018 - 11:02:58 EST
Store preempting context switch out event into Perf trace as a part of
PERF_RECORD_SWITCH[_CPU_WIDE] record.
Percentage of preempting and non-preempting context switches help
understanding the nature of workloads (CPU or IO bound) that are running
on a machine;
The event is treated as preemption one when task->state value of the
thread being switched out is TASK_RUNNING. Event type encoding is
implemented using PERF_RECORD_MISC_SWITCH_OUT_PREEMPT bit;
Signed-off-by: Alexey Budankov <alexey.budankov@xxxxxxxxxxxxxxx>
---
include/uapi/linux/perf_event.h | 4 ++++
kernel/events/core.c | 4 ++++
tools/include/uapi/linux/perf_event.h | 4 ++++
3 files changed, 12 insertions(+)
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 912b85b52344..cd6ad7e13824 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -655,6 +655,10 @@ struct perf_event_mmap_page {
* perf_event_attr::precise_ip.
*/
#define PERF_RECORD_MISC_EXACT_IP (1 << 14)
+/*
+ * Indicates that thread was preempted in TASK_RUNNING state
+ */
+#define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14)
/*
* Reserve the last bit to indicate some extended misc field
*/
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 7517b4fb3ef4..6b760e785116 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7584,6 +7584,10 @@ static void perf_event_switch(struct task_struct *task,
},
};
+ if (!sched_in && task->state == TASK_RUNNING)
+ switch_event.event_id.header.misc |=
+ PERF_RECORD_MISC_SWITCH_OUT_PREEMPT;
+
perf_iterate_sb(perf_event_switch_output,
&switch_event,
NULL);
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 912b85b52344..cd6ad7e13824 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -655,6 +655,10 @@ struct perf_event_mmap_page {
* perf_event_attr::precise_ip.
*/
#define PERF_RECORD_MISC_EXACT_IP (1 << 14)
+/*
+ * Indicates that thread was preempted in TASK_RUNNING state
+ */
+#define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14)
/*
* Reserve the last bit to indicate some extended misc field
*/