[RFC PATCH v2 1/6] sched: Unify runtime accounting across classes

From: Juri Lelli
Date: Fri Aug 07 2020 - 05:51:36 EST


From: Peter Zijlstra <peterz@xxxxxxxxxxxxx>

All classes use sched_entity::exec_start to track runtime and have
copies of the exact same code around to compute runtime.

Collapse all that.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
include/linux/sched.h | 2 +-
kernel/sched/deadline.c | 17 +++-----------
kernel/sched/fair.c | 50 +++++++++++++++++++++++++++++++---------
kernel/sched/rt.c | 17 +++-----------
kernel/sched/sched.h | 2 ++
kernel/sched/stop_task.c | 16 +------------
6 files changed, 49 insertions(+), 55 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a6bf77c346876..f7b9ba04970bc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -424,7 +424,7 @@ struct sched_statistics {

u64 block_start;
u64 block_max;
- u64 exec_max;
+ s64 exec_max;
u64 slice_max;

u64 nr_migrations_cold;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 3862a28cd05d0..2ece83b5991f5 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1221,9 +1221,8 @@ static void update_curr_dl(struct rq *rq)
{
struct task_struct *curr = rq->curr;
struct sched_dl_entity *dl_se = &curr->dl;
- u64 delta_exec, scaled_delta_exec;
+ s64 delta_exec, scaled_delta_exec;
int cpu = cpu_of(rq);
- u64 now;

if (!dl_task(curr) || !on_dl_rq(dl_se))
return;
@@ -1236,23 +1235,13 @@ static void update_curr_dl(struct rq *rq)
* natural solution, but the full ramifications of this
* approach need further study.
*/
- now = rq_clock_task(rq);
- delta_exec = now - curr->se.exec_start;
- if (unlikely((s64)delta_exec <= 0)) {
+ delta_exec = update_curr_common(rq);
+ if (unlikely(delta_exec <= 0)) {
if (unlikely(dl_se->dl_yielded))
goto throttle;
return;
}

- schedstat_set(curr->se.statistics.exec_max,
- max(curr->se.statistics.exec_max, delta_exec));
-
- curr->se.sum_exec_runtime += delta_exec;
- account_group_exec_runtime(curr, delta_exec);
-
- curr->se.exec_start = now;
- cgroup_account_cputime(curr, delta_exec);
-
if (dl_entity_is_special(dl_se))
return;

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2ba8f230feb9a..10a230d85104a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -836,30 +836,58 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
}
#endif /* CONFIG_SMP */

+static s64 update_curr_se(struct rq *rq, struct sched_entity *curr)
+{
+ u64 now = rq_clock_task(rq);
+ s64 delta_exec;
+
+ delta_exec = now - curr->exec_start;
+ if (unlikely(delta_exec <= 0))
+ return delta_exec;
+
+ curr->exec_start = now;
+ curr->sum_exec_runtime += delta_exec;
+
+ schedstat_set(curr->statistics.exec_max,
+ max(delta_exec, curr->statistics.exec_max));
+
+ return delta_exec;
+}
+
+/*
+ * Used by other classes to account runtime.
+ */
+s64 update_curr_common(struct rq *rq)
+{
+ struct task_struct *curr = rq->curr;
+ s64 delta_exec;
+
+ delta_exec = update_curr_se(rq, &curr->se);
+ if (unlikely(delta_exec <= 0))
+ return delta_exec;
+
+ account_group_exec_runtime(curr, delta_exec);
+ cgroup_account_cputime(curr, delta_exec);
+
+ return delta_exec;
+}
+
/*
* Update the current task's runtime statistics.
*/
static void update_curr(struct cfs_rq *cfs_rq)
{
struct sched_entity *curr = cfs_rq->curr;
- u64 now = rq_clock_task(rq_of(cfs_rq));
- u64 delta_exec;
+ s64 delta_exec;

if (unlikely(!curr))
return;

- delta_exec = now - curr->exec_start;
- if (unlikely((s64)delta_exec <= 0))
+ delta_exec = update_curr_se(rq_of(cfs_rq), curr);
+ if (unlikely(delta_exec <= 0))
return;

- curr->exec_start = now;
-
- schedstat_set(curr->statistics.exec_max,
- max(delta_exec, curr->statistics.exec_max));
-
- curr->sum_exec_runtime += delta_exec;
schedstat_add(cfs_rq->exec_clock, delta_exec);
-
curr->vruntime += calc_delta_fair(delta_exec, curr);
update_min_vruntime(cfs_rq);

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index f215eea6a9661..196171fbf5978 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -997,26 +997,15 @@ static void update_curr_rt(struct rq *rq)
{
struct task_struct *curr = rq->curr;
struct sched_rt_entity *rt_se = &curr->rt;
- u64 delta_exec;
- u64 now;
+ s64 delta_exec;

if (curr->sched_class != &rt_sched_class)
return;

- now = rq_clock_task(rq);
- delta_exec = now - curr->se.exec_start;
- if (unlikely((s64)delta_exec <= 0))
+ delta_exec = update_curr_common(rq);
+ if (unlikely(delta_exec < 0))
return;

- schedstat_set(curr->se.statistics.exec_max,
- max(curr->se.statistics.exec_max, delta_exec));
-
- curr->se.sum_exec_runtime += delta_exec;
- account_group_exec_runtime(curr, delta_exec);
-
- curr->se.exec_start = now;
- cgroup_account_cputime(curr, delta_exec);
-
if (!rt_bandwidth_enabled())
return;

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 3fd283892761d..963c16fc27500 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1769,6 +1769,8 @@ extern const u32 sched_prio_to_wmult[40];

#define RETRY_TASK ((void *)-1UL)

+extern s64 update_curr_common(struct rq *rq);
+
struct sched_class {

#ifdef CONFIG_UCLAMP_TASK
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index 394bc8126a1e5..1eb1e336e18e7 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -62,21 +62,7 @@ static void yield_task_stop(struct rq *rq)

static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
{
- struct task_struct *curr = rq->curr;
- u64 delta_exec;
-
- delta_exec = rq_clock_task(rq) - curr->se.exec_start;
- if (unlikely((s64)delta_exec < 0))
- delta_exec = 0;
-
- schedstat_set(curr->se.statistics.exec_max,
- max(curr->se.statistics.exec_max, delta_exec));
-
- curr->se.sum_exec_runtime += delta_exec;
- account_group_exec_runtime(curr, delta_exec);
-
- curr->se.exec_start = rq_clock_task(rq);
- cgroup_account_cputime(curr, delta_exec);
+ update_curr_common(rq);
}

/*
--
2.26.2