[RFC PATCH 4/5] sched/events: Introduce sched_entity load tracking trace event

From: Dietmar Eggemann
Date: Tue Mar 28 2017 - 02:36:13 EST


The trace event keys load and util (utilization) are mapped to:

(1) load : se->avg.load_avg

(2) util : se->avg.util_avg

To let this trace event work for configurations w/ and w/o group
scheduling support for cfs (CONFIG_FAIR_GROUP_SCHED) the following
special handling is necessary for non-existent key=value pairs:

path = "(null)" : In case of !CONFIG_FAIR_GROUP_SCHED or the
sched_entity represents a task.

id = -1 : In case of !CONFIG_FAIR_GROUP_SCHED or the
sched_entity represents a task.

comm = "(null)" : In case sched_entity represents a task_group.

pid = -1 : In case sched_entity represents a task_group.

The following list shows examples of the key=value pairs in different
configurations for:

(1) a task:

cpu=0 path=(null) id=-1 comm=sshd pid=2206 load=102 util=102

(2) a taskgroup:

cpu=1 path=/tg1/tg11/tg111 id=4 comm=(null) pid=-1 load=882 util=510

(3) an autogroup:

cpu=0 path=/autogroup-13 id=0 comm=(null) pid=-1 load=49 util=48

(4) w/o CONFIG_FAIR_GROUP_SCHED:

cpu=0 path=(null) id=-1 comm=sshd pid=2211 load=301 util=265

The trace event is only defined for CONFIG_SMP.

The helper functions __trace_sched_cpu(), __trace_sched_path() and
__trace_sched_id() are extended to deal with sched_entities as well.

Signed-off-by: Dietmar Eggemann <dietmar.eggemann@xxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
---
include/trace/events/sched.h | 63 +++++++++++++++++++++++++++++++++++---------
kernel/sched/fair.c | 3 +++
2 files changed, 54 insertions(+), 12 deletions(-)

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 51db8a90e45f..647cfaf528fd 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -566,14 +566,15 @@ TRACE_EVENT(sched_wake_idle_without_ipi,
#ifdef CONFIG_SMP
#ifdef CREATE_TRACE_POINTS
static inline
-int __trace_sched_cpu(struct cfs_rq *cfs_rq)
+int __trace_sched_cpu(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
#ifdef CONFIG_FAIR_GROUP_SCHED
- struct rq *rq = cfs_rq->rq;
+ struct rq *rq = cfs_rq ? cfs_rq->rq : NULL;
#else
- struct rq *rq = container_of(cfs_rq, struct rq, cfs);
+ struct rq *rq = cfs_rq ? container_of(cfs_rq, struct rq, cfs) : NULL;
#endif
- return cpu_of(rq);
+ return rq ? cpu_of(rq)
+ : task_cpu((container_of(se, struct task_struct, se)));
}

static inline
@@ -582,25 +583,24 @@ int __trace_sched_path(struct cfs_rq *cfs_rq, char *path, int len)
#ifdef CONFIG_FAIR_GROUP_SCHED
int l = path ? len : 0;

- if (task_group_is_autogroup(cfs_rq->tg))
+ if (cfs_rq && task_group_is_autogroup(cfs_rq->tg))
return autogroup_path(cfs_rq->tg, path, l) + 1;
- else
+ else if (cfs_rq && cfs_rq->tg->css.cgroup)
return cgroup_path(cfs_rq->tg->css.cgroup, path, l) + 1;
-#else
+#endif
if (path)
strcpy(path, "(null)");

return strlen("(null)");
-#endif
}

static inline int __trace_sched_id(struct cfs_rq *cfs_rq)
{
#ifdef CONFIG_FAIR_GROUP_SCHED
- return cfs_rq->tg->css.id;
-#else
- return -1;
+ if (cfs_rq)
+ return cfs_rq->tg->css.id;
#endif
+ return -1;
}
#endif /* CREATE_TRACE_POINTS */

@@ -623,7 +623,7 @@ TRACE_EVENT(sched_load_cfs_rq,
),

TP_fast_assign(
- __entry->cpu = __trace_sched_cpu(cfs_rq);
+ __entry->cpu = __trace_sched_cpu(cfs_rq, NULL);
__trace_sched_path(cfs_rq, __get_dynamic_array(path),
__get_dynamic_array_len(path));
__entry->id = __trace_sched_id(cfs_rq);
@@ -634,6 +634,45 @@ TRACE_EVENT(sched_load_cfs_rq,
TP_printk("cpu=%d path=%s id=%d load=%lu util=%lu", __entry->cpu,
__get_str(path), __entry->id, __entry->load, __entry->util)
);
+
+/*
+ * Tracepoint for sched_entity load tracking:
+ */
+TRACE_EVENT(sched_load_se,
+
+ TP_PROTO(struct sched_entity *se),
+
+ TP_ARGS(se),
+
+ TP_STRUCT__entry(
+ __field( int, cpu )
+ __dynamic_array(char, path,
+ __trace_sched_path(group_cfs_rq(se), NULL, 0) )
+ __field( int, id )
+ __array( char, comm, TASK_COMM_LEN )
+ __field( pid_t, pid )
+ __field( unsigned long, load )
+ __field( unsigned long, util )
+ ),
+
+ TP_fast_assign(
+ struct task_struct *p = group_cfs_rq(se) ? NULL
+ : container_of(se, struct task_struct, se);
+
+ __entry->cpu = __trace_sched_cpu(group_cfs_rq(se), se);
+ __trace_sched_path(group_cfs_rq(se), __get_dynamic_array(path),
+ __get_dynamic_array_len(path));
+ __entry->id = __trace_sched_id(group_cfs_rq(se));
+ memcpy(__entry->comm, p ? p->comm : "(null)", TASK_COMM_LEN);
+ __entry->pid = p ? p->pid : -1;
+ __entry->load = se->avg.load_avg;
+ __entry->util = se->avg.util_avg;
+ ),
+
+ TP_printk("cpu=%d path=%s id=%d comm=%s pid=%d load=%lu util=%lu",
+ __entry->cpu, __get_str(path), __entry->id, __entry->comm,
+ __entry->pid, __entry->load, __entry->util)
+);
#endif /* CONFIG_SMP */
#endif /* _TRACE_SCHED_H */

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 04d4f81b96ae..d1dcb19f5b55 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2940,6 +2940,8 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa,

if (cfs_rq)
trace_sched_load_cfs_rq(cfs_rq);
+ else
+ trace_sched_load_se(container_of(sa, struct sched_entity, avg));

return decayed;
}
@@ -3162,6 +3164,7 @@ static inline int propagate_entity_load_avg(struct sched_entity *se)
update_tg_cfs_load(cfs_rq, se);

trace_sched_load_cfs_rq(cfs_rq);
+ trace_sched_load_se(se);

return 1;
}
--
2.11.0