[RFC PATCH 3/3] Introduce task's 'off cpu' time

From: Prakash Sangappa
Date: Fri Aug 27 2021 - 19:34:51 EST


Add a task's 'off cpu' time in nanoseconds to sched_info, that represents
accumulated time spent either on run queue or blocked in the kernel.
Publish the off cpu time thru the shared structure. This will be used by
an application to determine cpu time consumed(time executing on a cpu) as
accurately as possible, by taking elapsed time and subtracting off cpu
time.

Signed-off-by: Prakash Sangappa <prakash.sangappa@xxxxxxxxxx>
---
include/linux/sched.h | 6 +++++
include/linux/task_shared.h | 2 ++
include/uapi/linux/task_shared.h | 1 +
kernel/sched/stats.h | 56 ++++++++++++++++++++++++++--------------
4 files changed, 45 insertions(+), 20 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 237aa21..a63e447 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -311,6 +311,12 @@ struct sched_info {
/* When were we last queued to run? */
unsigned long long last_queued;

+ /* When did we last leave cpu */
+ unsigned long long last_depart;
+
+ /* Time spent off cpu */
+ unsigned long long off_cpu;
+
#endif /* CONFIG_SCHED_INFO */
};

diff --git a/include/linux/task_shared.h b/include/linux/task_shared.h
index 62793e4..ce475c4 100644
--- a/include/linux/task_shared.h
+++ b/include/linux/task_shared.h
@@ -70,6 +70,8 @@ struct task_ushrd_struct {
if (shrdp != NULL && shrdp->kaddr != NULL) { \
shrdp->kaddr->ts.run_delay = \
t->sched_info.run_delay; \
+ shrdp->kaddr->ts.off_cpu = \
+ t->sched_info.off_cpu; \
if (p) { \
shrdp->kaddr->ts.pcount = \
t->sched_info.pcount; \
diff --git a/include/uapi/linux/task_shared.h b/include/uapi/linux/task_shared.h
index 06a8522..c867c09 100644
--- a/include/uapi/linux/task_shared.h
+++ b/include/uapi/linux/task_shared.h
@@ -18,5 +18,6 @@ struct task_schedstat {
volatile u64 sum_exec_runtime;
volatile u64 run_delay;
volatile u64 pcount;
+ volatile u64 off_cpu;
};
#endif
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 6b2d69c..ee59994 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -159,17 +159,24 @@ static inline void psi_sched_switch(struct task_struct *prev,
*/
static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t)
{
- unsigned long long delta = 0;
+ unsigned long long now = rq_clock(rq), delta = 0, ddelta = 0;

- if (!t->sched_info.last_queued)
- return;
+ if (t->sched_info.last_queued) {
+ delta = now - t->sched_info.last_queued;
+ t->sched_info.last_queued = 0;
+ t->sched_info.run_delay += delta;
+
+ rq_sched_info_dequeue(rq, delta);
+ }

- delta = rq_clock(rq) - t->sched_info.last_queued;
- t->sched_info.last_queued = 0;
- t->sched_info.run_delay += delta;
- task_update_runq_stat(t, 0);
+ if (t->sched_info.last_depart) {
+ ddelta = now - t->sched_info.last_depart;
+ t->sched_info.last_depart = 0;
+ t->sched_info.off_cpu += ddelta;
+ }

- rq_sched_info_dequeue(rq, delta);
+ if (delta || ddelta)
+ task_update_runq_stat(t, 0);
}

/*
@@ -179,20 +186,27 @@ static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t)
*/
static void sched_info_arrive(struct rq *rq, struct task_struct *t)
{
- unsigned long long now, delta = 0;
+ unsigned long long now = rq_clock(rq), delta = 0, ddelta = 0;

- if (!t->sched_info.last_queued)
- return;
+ if (t->sched_info.last_queued) {
+ delta = now - t->sched_info.last_queued;
+ t->sched_info.last_queued = 0;
+ t->sched_info.run_delay += delta;
+ t->sched_info.last_arrival = now;
+ t->sched_info.pcount++;
+
+ rq_sched_info_arrive(rq, delta);
+ }
+
+ if (t->sched_info.last_depart) {
+ ddelta = now - t->sched_info.last_depart;
+ t->sched_info.last_depart = 0;
+ t->sched_info.off_cpu += ddelta;
+ }

- now = rq_clock(rq);
- delta = now - t->sched_info.last_queued;
- t->sched_info.last_queued = 0;
- t->sched_info.run_delay += delta;
- t->sched_info.last_arrival = now;
- t->sched_info.pcount++;
- task_update_runq_stat(t, 1);
+ if (delta || ddelta)
+ task_update_runq_stat(t, 1);

- rq_sched_info_arrive(rq, delta);
}

/*
@@ -216,10 +230,12 @@ static inline void sched_info_enqueue(struct rq *rq, struct task_struct *t)
*/
static inline void sched_info_depart(struct rq *rq, struct task_struct *t)
{
- unsigned long long delta = rq_clock(rq) - t->sched_info.last_arrival;
+ unsigned long long delta, now = rq_clock(rq);

+ delta = now - t->sched_info.last_arrival;
rq_sched_info_depart(rq, delta);

+ t->sched_info.last_depart = now;
if (task_is_running(t))
sched_info_enqueue(rq, t);
}
--
2.7.4