[PATCH v3 5/7] sched: introduce task block time in schedstats

From: Yafang Shao
Date: Tue Aug 24 2021 - 07:30:27 EST


Currently in schedstats we have sum_sleep_runtime and iowait_sum, but
there's no metric to show how long the task is in D state. Once a task in
D state, it means the task is blocked in the kernel, for example the
task may be waiting for a mutex. The D state is more frequent than
iowait, and it is more critital than S state. So it is worth to add a
metric to measure it.

Signed-off-by: Yafang Shao <laoar.shao@xxxxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxx>
Cc: Alison Chaiken <achaiken@xxxxxxxxxxx>
---
include/linux/sched.h | 2 ++
kernel/sched/debug.c | 6 ++++--
kernel/sched/stats.c | 1 +
3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 39c29eae1af9..7888ad8384ba 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -502,6 +502,8 @@ struct sched_statistics {

u64 block_start;
u64 block_max;
+ s64 sum_block_runtime;
+
u64 exec_max;
u64 slice_max;

diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 705987aed658..5c6bc3f373f0 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -560,10 +560,11 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
(long long)(p->nvcsw + p->nivcsw),
p->prio);

- SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
+ SEQ_printf(m, "%9lld.%06ld %9lld.%06ld %9lld.%06ld %9lld.%06ld",
SPLIT_NS(schedstat_val_or_zero(p->stats.wait_sum)),
SPLIT_NS(p->se.sum_exec_runtime),
- SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)));
+ SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)),
+ SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime)));

#ifdef CONFIG_NUMA_BALANCING
SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
@@ -995,6 +996,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
u64 avg_atom, avg_per_cpu;

PN_SCHEDSTAT(sum_sleep_runtime);
+ PN_SCHEDSTAT(sum_block_runtime);
PN_SCHEDSTAT(wait_start);
PN_SCHEDSTAT(sleep_start);
PN_SCHEDSTAT(block_start);
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
index b2542f4d3192..21fae41c06f5 100644
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@@ -82,6 +82,7 @@ void __update_stats_enqueue_sleeper(struct rq *rq, struct task_struct *p,

__schedstat_set(stats->block_start, 0);
__schedstat_add(stats->sum_sleep_runtime, delta);
+ __schedstat_add(stats->sum_block_runtime, delta);

if (p) {
if (p->in_iowait) {
--
2.18.2