[PATCH] sched: Enabled schedstat when schedstat tracepoints are enabled
From: Steven Rostedt
Date: Wed Apr 12 2017 - 16:04:43 EST
From: Steven Rostedt (VMware) <rostedt@xxxxxxxxxxx>
During my tests, I see this in my dmesg:
"Scheduler tracepoints stat_sleep, stat_iowait, stat_blocked and
stat_runtime require the kernel parameter schedstats=enabled or kernel.sched_schedstats=1"
And found the commit:
cb2517653fc ("sched/debug: Make schedstats a runtime tunable that is disabled by default")
Which states:
"For tracepoints, there is a simple warning as it's not safe to activate
schedstats in the context when it's known the tracepoint may be wanted
but is unavailable."
I'm assuming that Mel did not know about the TRACE_EVENT_FN() and
DEFINE_EVENT_FN() that allow for callbacks for tracepoints as they are
enabled and disabled. I do not see any reason for not enabling
schedstat when one of its tracepoints are enabled.
The state of schedstat is saved when the first tracepoint is enabled,
and that state is put back when the tracepoints are disabled.
Signed-off-by: Steven Rostedt (VMware) <rostedt@xxxxxxxxxxx>
---
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d67eee8..691ab93f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1456,6 +1456,14 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
}
+#if defined(CONFIG_TRACING) && defined(CONFIG_SCHEDSTATS)
+int schedstat_tracepoint_reg(void);
+void schedstat_tracepoint_unreg(void);
+#else
+static inline int schedstat_tracepoint_reg(void) { return 0; }
+static inline void schedstat_tracepoint_unreg(void) { }
+#endif
+
/*
* cond_resched() and cond_resched_lock(): latency reduction via
* explicit rescheduling in places that are safe. The return
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 9e3ef6c..ab8fa69 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -346,32 +346,36 @@ DECLARE_EVENT_CLASS(sched_stat_template,
* Tracepoint for accounting wait time (time the task is runnable
* but not actually running due to scheduler contention).
*/
-DEFINE_EVENT(sched_stat_template, sched_stat_wait,
+DEFINE_EVENT_FN(sched_stat_template, sched_stat_wait,
TP_PROTO(struct task_struct *tsk, u64 delay),
- TP_ARGS(tsk, delay));
+ TP_ARGS(tsk, delay), schedstat_tracepoint_reg,
+ schedstat_tracepoint_unreg);
/*
* Tracepoint for accounting sleep time (time the task is not runnable,
* including iowait, see below).
*/
-DEFINE_EVENT(sched_stat_template, sched_stat_sleep,
+DEFINE_EVENT_FN(sched_stat_template, sched_stat_sleep,
TP_PROTO(struct task_struct *tsk, u64 delay),
- TP_ARGS(tsk, delay));
+ TP_ARGS(tsk, delay), schedstat_tracepoint_reg,
+ schedstat_tracepoint_unreg);
/*
* Tracepoint for accounting iowait time (time the task is not runnable
* due to waiting on IO to complete).
*/
-DEFINE_EVENT(sched_stat_template, sched_stat_iowait,
+DEFINE_EVENT_FN(sched_stat_template, sched_stat_iowait,
TP_PROTO(struct task_struct *tsk, u64 delay),
- TP_ARGS(tsk, delay));
+ TP_ARGS(tsk, delay), schedstat_tracepoint_reg,
+ schedstat_tracepoint_unreg);
/*
* Tracepoint for accounting blocked time (time the task is in uninterruptible).
*/
-DEFINE_EVENT(sched_stat_template, sched_stat_blocked,
+DEFINE_EVENT_FN(sched_stat_template, sched_stat_blocked,
TP_PROTO(struct task_struct *tsk, u64 delay),
- TP_ARGS(tsk, delay));
+ TP_ARGS(tsk, delay), schedstat_tracepoint_reg,
+ schedstat_tracepoint_unreg);
/*
* Tracepoint for accounting runtime (time the task is executing
@@ -403,9 +407,10 @@ DECLARE_EVENT_CLASS(sched_stat_runtime,
(unsigned long long)__entry->vruntime)
);
-DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime,
+DEFINE_EVENT_FN(sched_stat_runtime, sched_stat_runtime,
TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
- TP_ARGS(tsk, runtime, vruntime));
+ TP_ARGS(tsk, runtime, vruntime), schedstat_tracepoint_reg,
+ schedstat_tracepoint_unreg);
/*
* Tracepoint for showing priority inheritance modifying a tasks
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3b31fc0..95e9ce9 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2266,6 +2266,8 @@ int sysctl_numa_balancing(struct ctl_table *table, int write,
DEFINE_STATIC_KEY_FALSE(sched_schedstats);
static bool __initdata __sched_schedstats = false;
+static int schedstat_tracepoint_ref;
+static bool schedstat_save_state;
static void set_schedstats(bool enabled)
{
@@ -2275,6 +2277,32 @@ static void set_schedstats(bool enabled)
static_branch_disable(&sched_schedstats);
}
+/*
+ * schedstat_tracepoint_reg() and unreg() are called by the tracepoint
+ * regfunc/unregfunc functions. They are protected by the tracepoint mutex.
+ * See kernel/tracepoint.c:tracepoint_add_func().
+ *
+ * The modifications to schedstat_tracepoint_ref and schedstat_save_state
+ * are only done under that mutex, and do not need further protection.
+ */
+int schedstat_tracepoint_reg(void)
+{
+ if (!schedstat_tracepoint_ref) {
+ schedstat_save_state = schedstat_enabled();
+ if (!schedstat_save_state)
+ set_schedstats(true);
+ }
+ schedstat_tracepoint_ref++;
+}
+
+void schedstat_tracepoint_unreg(void)
+{
+ schedstat_tracepoint_ref--;
+ if (schedstat_tracepoint_ref || schedstat_save_state)
+ return;
+ set_schedstats(false);
+}
+
void force_schedstat_enabled(void)
{
if (!schedstat_enabled()) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index dea1389..d6665f6 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3526,27 +3526,6 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
-static inline void check_schedstat_required(void)
-{
-#ifdef CONFIG_SCHEDSTATS
- if (schedstat_enabled())
- return;
-
- /* Force schedstat enabled if a dependent tracepoint is active */
- if (trace_sched_stat_wait_enabled() ||
- trace_sched_stat_sleep_enabled() ||
- trace_sched_stat_iowait_enabled() ||
- trace_sched_stat_blocked_enabled() ||
- trace_sched_stat_runtime_enabled()) {
- printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, "
- "stat_blocked and stat_runtime require the "
- "kernel parameter schedstats=enabled or "
- "kernel.sched_schedstats=1\n");
- }
-#endif
-}
-
-
/*
* MIGRATION
*
@@ -3617,7 +3596,6 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
if (flags & ENQUEUE_WAKEUP)
place_entity(cfs_rq, se, 0);
- check_schedstat_required();
update_stats_enqueue(cfs_rq, se, flags);
check_spread(cfs_rq, se);
if (!curr)