[tip:sched/core] sched/tracing: Add a new tracepoint for sleeptime

From: tip-bot for Arun Sharma
Date: Sat Dec 24 2011 - 02:32:54 EST


Commit-ID: 1ac9bc6943edf7d181b4b1cc734981350d4f6bae
Gitweb: http://git.kernel.org/tip/1ac9bc6943edf7d181b4b1cc734981350d4f6bae
Author: Arun Sharma <asharma@xxxxxx>
AuthorDate: Wed, 21 Dec 2011 16:15:40 -0800
Committer: Ingo Molnar <mingo@xxxxxxx>
CommitDate: Fri, 23 Dec 2011 17:56:17 +0100

sched/tracing: Add a new tracepoint for sleeptime

If CONFIG_SCHEDSTATS is defined, the kernel maintains
information about how long the task was sleeping or
in the case of iowait, blocking in the kernel before
getting woken up.

This will be useful for sleep time profiling.

Note: this information is only provided for sched_fair.
Other scheduling classes may choose to provide this in
the future.

Note: the delay includes the time spent on the runqueue
as well.

Signed-off-by: Arun Sharma <asharma@xxxxxx>
Acked-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
Cc: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx>
Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxxxxx>
Cc: Andrew Vagin <avagin@xxxxxxxxxx>
Cc: Frederic Weisbecker <fweisbec@xxxxxxxxx>
Link: http://lkml.kernel.org/r/1324512940-32060-2-git-send-email-asharma@xxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxx>
---
include/trace/events/sched.h | 50 ++++++++++++++++++++++++++++++++++++++++++
kernel/sched/core.c | 1 +
kernel/sched/fair.c | 2 -
3 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index e33ed1b..6ba596b 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -370,6 +370,56 @@ TRACE_EVENT(sched_stat_runtime,
(unsigned long long)__entry->vruntime)
);

+#ifdef CREATE_TRACE_POINTS
+static inline u64 trace_get_sleeptime(struct task_struct *tsk)
+{
+#ifdef CONFIG_SCHEDSTATS
+ u64 block, sleep;
+
+ block = tsk->se.statistics.block_start;
+ sleep = tsk->se.statistics.sleep_start;
+ tsk->se.statistics.block_start = 0;
+ tsk->se.statistics.sleep_start = 0;
+
+ return block ? block : sleep ? sleep : 0;
+#else
+ return 0;
+#endif
+}
+#endif
+
+/*
+ * Tracepoint for accounting sleeptime (time the task is sleeping
+ * or waiting for I/O).
+ */
+TRACE_EVENT(sched_stat_sleeptime,
+
+ TP_PROTO(struct task_struct *tsk, u64 now),
+
+ TP_ARGS(tsk, now),
+
+ TP_STRUCT__entry(
+ __array( char, comm, TASK_COMM_LEN )
+ __field( pid_t, pid )
+ __field( u64, sleeptime )
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+ __entry->pid = tsk->pid;
+ __entry->sleeptime = trace_get_sleeptime(tsk);
+ __entry->sleeptime = __entry->sleeptime ?
+ now - __entry->sleeptime : 0;
+ )
+ TP_perf_assign(
+ __perf_count(__entry->sleeptime);
+ ),
+
+ TP_printk("comm=%s pid=%d sleeptime=%Lu [ns]",
+ __entry->comm, __entry->pid,
+ (unsigned long long)__entry->sleeptime)
+);
+
/*
* Tracepoint for showing priority inheritance modifying a tasks
* priority.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8ffe523..4dbfd04 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1937,6 +1937,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
local_irq_enable();
#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
finish_lock_switch(rq, prev);
+ trace_sched_stat_sleeptime(current, rq->clock);

fire_sched_in_preempt_notifiers(current);
if (mm)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bdf1883..8e42de9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1003,7 +1003,6 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
if (unlikely(delta > se->statistics.sleep_max))
se->statistics.sleep_max = delta;

- se->statistics.sleep_start = 0;
se->statistics.sum_sleep_runtime += delta;

if (tsk) {
@@ -1020,7 +1019,6 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
if (unlikely(delta > se->statistics.block_max))
se->statistics.block_max = delta;

- se->statistics.block_start = 0;
se->statistics.sum_sleep_runtime += delta;

if (tsk) {
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/