[RESEND RFC PATCH 2/3] Publish tasks's scheduler stats thru the shared structure

From: Prakash Sangappa
Date: Wed Sep 08 2021 - 20:16:30 EST


Define a 'struct task_schedstat' which contains members corresponding to
scheduler stats that are currently available thru /proc/pid/tasks/pid
/schedstats. Update scheduler stats in this structure in kernel at the
same time stats in the 'struct task_struct' are updated. Add a
TASK_SCHEDSTAT option to task_getshared system call to request these per
thread scheduler stats thru the shared structure.

Signed-off-by: Prakash Sangappa <prakash.sangappa@xxxxxxxxxx>
---
include/linux/task_shared.h | 35 ++++++++++++++++++++++++++++++++++-
include/uapi/linux/task_shared.h | 22 ++++++++++++++++++++++
kernel/sched/deadline.c | 1 +
kernel/sched/fair.c | 1 +
kernel/sched/rt.c | 1 +
kernel/sched/sched.h | 1 +
kernel/sched/stats.h | 3 +++
kernel/sched/stop_task.c | 1 +
mm/task_shared.c | 13 +++++++++++++
9 files changed, 77 insertions(+), 1 deletion(-)
create mode 100644 include/uapi/linux/task_shared.h

diff --git a/include/linux/task_shared.h b/include/linux/task_shared.h
index de17849..62793e4 100644
--- a/include/linux/task_shared.h
+++ b/include/linux/task_shared.h
@@ -3,6 +3,7 @@
#define __TASK_SHARED_H__

#include <linux/mm_types.h>
+#include <uapi/linux/task_shared.h>

/*
* Track user-kernel shared pages referred by mm_struct
@@ -18,7 +19,7 @@ struct ushared_pages {
* fast communication.
*/
struct task_ushared {
- long version;
+ struct task_schedstat ts;
};

/*
@@ -52,6 +53,38 @@ struct task_ushrd_struct {
struct ushared_pg *upg;
};

+
+#ifdef CONFIG_SCHED_INFO
+
+#define task_update_exec_runtime(t) \
+ do { \
+ struct task_ushrd_struct *shrdp = t->task_ushrd; \
+ if (shrdp != NULL && shrdp->kaddr != NULL) \
+ shrdp->kaddr->ts.sum_exec_runtime = \
+ t->se.sum_exec_runtime; \
+ } while (0)
+
+#define task_update_runq_stat(t, p) \
+ do { \
+ struct task_ushrd_struct *shrdp = t->task_ushrd; \
+ if (shrdp != NULL && shrdp->kaddr != NULL) { \
+ shrdp->kaddr->ts.run_delay = \
+ t->sched_info.run_delay; \
+ if (p) { \
+ shrdp->kaddr->ts.pcount = \
+ t->sched_info.pcount; \
+ } \
+ } \
+ } while (0)
+#else
+
+#define task_update_exec_runtime(t) do { } while (0)
+#define task_update_runq_stat(t, p) do { } while (0)
+
+#endif
+
+
+
extern void task_ushared_free(struct task_struct *t);
extern void mm_ushared_clear(struct mm_struct *mm);
#endif /* __TASK_SHARED_H__ */
diff --git a/include/uapi/linux/task_shared.h b/include/uapi/linux/task_shared.h
new file mode 100644
index 0000000..06a8522
--- /dev/null
+++ b/include/uapi/linux/task_shared.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef LINUX_TASK_SHARED_H
+#define LINUX_TASK_SHARED_H
+
+/*
+ * Per task user-kernel mapped structure for faster communication.
+ */
+
+/*
+ * Following is the option to request struct task_schedstats shared structure,
+ * in which kernel shares the task's exec time and time on run queue & number
+ * of times it was scheduled to run on a cpu. Requires kernel with
+ * CONFIG_SCHED_INFO enabled.
+ */
+#define TASK_SCHEDSTAT 1
+
+struct task_schedstat {
+ volatile u64 sum_exec_runtime;
+ volatile u64 run_delay;
+ volatile u64 pcount;
+};
+#endif
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index aaacd6c..189c74c 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1270,6 +1270,7 @@ static void update_curr_dl(struct rq *rq)

curr->se.sum_exec_runtime += delta_exec;
account_group_exec_runtime(curr, delta_exec);
+ task_update_exec_runtime(curr);

curr->se.exec_start = now;
cgroup_account_cputime(curr, delta_exec);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 44c4520..cbd182b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -817,6 +817,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
if (entity_is_task(curr)) {
struct task_struct *curtask = task_of(curr);

+ task_update_exec_runtime(curtask);
trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
cgroup_account_cputime(curtask, delta_exec);
account_group_exec_runtime(curtask, delta_exec);
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 3daf42a..61082fc 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1014,6 +1014,7 @@ static void update_curr_rt(struct rq *rq)

curr->se.sum_exec_runtime += delta_exec;
account_group_exec_runtime(curr, delta_exec);
+ task_update_exec_runtime(curr);

curr->se.exec_start = now;
cgroup_account_cputime(curr, delta_exec);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 14a41a2..4ebbd8f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -67,6 +67,7 @@
#include <linux/syscalls.h>
#include <linux/task_work.h>
#include <linux/tsacct_kern.h>
+#include <linux/task_shared.h>

#include <asm/tlb.h>

diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index d8f8eb0..6b2d69c 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */

#ifdef CONFIG_SCHEDSTATS
+#include <linux/task_shared.h>

/*
* Expects runqueue lock to be held for atomicity of update
@@ -166,6 +167,7 @@ static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t)
delta = rq_clock(rq) - t->sched_info.last_queued;
t->sched_info.last_queued = 0;
t->sched_info.run_delay += delta;
+ task_update_runq_stat(t, 0);

rq_sched_info_dequeue(rq, delta);
}
@@ -188,6 +190,7 @@ static void sched_info_arrive(struct rq *rq, struct task_struct *t)
t->sched_info.run_delay += delta;
t->sched_info.last_arrival = now;
t->sched_info.pcount++;
+ task_update_runq_stat(t, 1);

rq_sched_info_arrive(rq, delta);
}
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index f988ebe..7b9b60f 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -82,6 +82,7 @@ static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
max(curr->se.statistics.exec_max, delta_exec));

curr->se.sum_exec_runtime += delta_exec;
+ task_update_exec_runtime(curr);
account_group_exec_runtime(curr, delta_exec);

curr->se.exec_start = rq_clock_task(rq);
diff --git a/mm/task_shared.c b/mm/task_shared.c
index 3ec5eb6..7169ccd 100644
--- a/mm/task_shared.c
+++ b/mm/task_shared.c
@@ -275,6 +275,14 @@ static long task_getshared(u64 opt, u64 flags, void __user *uaddr)
{
struct task_ushrd_struct *ushrd = current->task_ushrd;

+ /* Currently only TASK_SCHEDSTAT supported */
+#ifdef CONFIG_SCHED_INFO
+ if (opt != TASK_SCHEDSTAT)
+ return (-EINVAL);
+#else
+ return (-EOPNOTSUPP);
+#endif
+
/* We have address, return. */
if (ushrd != NULL && ushrd->upg != NULL) {
if (copy_to_user(uaddr, &ushrd->uaddr,
@@ -286,6 +294,11 @@ static long task_getshared(u64 opt, u64 flags, void __user *uaddr)
task_ushared_alloc();
ushrd = current->task_ushrd;
if (ushrd != NULL && ushrd->upg != NULL) {
+ if (opt == TASK_SCHEDSTAT) {
+ /* init current values */
+ task_update_exec_runtime(current);
+ task_update_runq_stat(current, 1);
+ }
if (copy_to_user(uaddr, &ushrd->uaddr,
sizeof(struct task_ushared *)))
return (-EFAULT);
--
2.7.4