[PATCH 2/2] sched: fix schedstats for idle kthreads

From: Jian Wen
Date: Tue Mar 08 2022 - 01:51:50 EST


Idle kthreads are sleeping instead of being blocked, since they are
not blocked at all.
This patch make idle kthreads triggers the sched:sched_stat_sleep
event instead of sched:sched_stat_blocked.

Testing:
A kernel module[1] is used to trigger sched:sched_stat_blocked/sleep event.

$ sudo sysctl -w kernel.sched_schedstats=1
kernel.sched_schedstats = 1

IDLE task now triggers the sched_stat_sleep event instead of
sched_stat_blocked:
$ sudo perf record -e sched:sched_stat_blocked -e sched:sched_stat_sleep insmod ./delay.ko
$ sudo perf script
...
swapper 0 [000] 546.194819: sched:sched_stat_sleep: comm=insmod pid=1073 delay=4039079114 [ns]
...

UNINTERRUPTIBLE task still triggers sched:sched_stat_blocked:
$ sudo perf record -e sched:sched_stat_blocked -e sched:sched_stat_sleep insmod ./delay.ko block=1
$ sudo perf script
...
swapper 0 [000] 473.874658: sched:sched_stat_blocked: comm=insmod pid=1062 delay=4044046956 [ns]
...

[1]
static int block=0;
module_param(block, int, 0660);

static int __init init_delay(void)
{

if (block)
set_current_state(TASK_UNINTERRUPTIBLE);
else
set_current_state(TASK_IDLE);

schedule_timeout(4*HZ);

return 0;
}

Signed-off-by: Jian Wen <wenjian1@xxxxxxxxxx>
---
kernel/sched/deadline.c | 2 +-
kernel/sched/fair.c | 2 +-
kernel/sched/rt.c | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index e6fe3b46432a..f7e60c5983e8 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1540,7 +1540,7 @@ update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se,
unsigned int state;

state = READ_ONCE(p->__state);
- if (state & TASK_INTERRUPTIBLE)
+ if (state & TASK_INTERRUPTIBLE || state == TASK_IDLE)
__schedstat_set(p->stats.sleep_start,
rq_clock(rq_of_dl_rq(dl_rq)));
else if (state & TASK_UNINTERRUPTIBLE)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index fcfb22c835e4..f1eb0cf8dea3 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -986,7 +986,7 @@ update_stats_dequeue_fair(struct cfs_rq *cfs_rq, struct sched_entity *se, int fl

/* XXX racy against TTWU */
state = READ_ONCE(tsk->__state);
- if (state & TASK_INTERRUPTIBLE)
+ if (state & TASK_INTERRUPTIBLE || state == TASK_IDLE)
__schedstat_set(tsk->stats.sleep_start,
rq_clock(rq_of(cfs_rq)));
else if (state & TASK_UNINTERRUPTIBLE)
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 5c4160f8cb23..f3cc5618ec57 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1384,7 +1384,7 @@ update_stats_dequeue_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
unsigned int state;

state = READ_ONCE(p->__state);
- if (state & TASK_INTERRUPTIBLE)
+ if (state & TASK_INTERRUPTIBLE || state == TASK_IDLE)
__schedstat_set(p->stats.sleep_start,
rq_clock(rq_of_rt_rq(rt_rq)));
else if (state & TASK_UNINTERRUPTIBLE)
--
2.25.1