[PATCH 13/15] sched,fair: propagate sum_exec_runtime up the hierarchy
From: Rik van Riel
Date: Wed Aug 21 2019 - 22:18:36 EST
Now that enqueue_task_fair and dequeue_task_fair no longer iterate up
the hierarchy all the time, a method to lazily propagate sum_exec_runtime
up the hierarchy is necessary.
Once a tick, propagate the newly accumulated exec_runtime up the hierarchy,
and feed it into CFS bandwidth control.
Remove the pointless call to account_cfs_rq_runtime from update_curr,
which is always called with a root cfs_rq.
Signed-off-by: Rik van Riel <riel@xxxxxxxxxxx>
---
include/linux/sched.h | 1 +
kernel/sched/core.c | 1 +
kernel/sched/fair.c | 22 ++++++++++++++++++++--
3 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 901c710363e7..bdca15b3afe7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -454,6 +454,7 @@ struct sched_entity {
int depth;
unsigned long enqueued_h_load;
unsigned long enqueued_h_weight;
+ u64 propagated_exec_runtime;
struct load_weight h_load;
struct sched_entity *parent;
/* rq on which this entity is (to be) queued: */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fbd96900f715..9915d20e84a9 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2137,6 +2137,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
INIT_LIST_HEAD(&p->se.group_node);
#ifdef CONFIG_FAIR_GROUP_SCHED
+ p->se.propagated_exec_runtime = 0;
p->se.cfs_rq = NULL;
#endif
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5cfa3dbeba49..d6c881c5c4d5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -898,8 +898,6 @@ static void update_curr(struct cfs_rq *cfs_rq)
trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
cgroup_account_cputime(curtask, delta_exec);
account_group_exec_runtime(curtask, delta_exec);
-
- account_cfs_rq_runtime(cfs_rq, delta_exec);
}
static void update_curr_fair(struct rq *rq)
@@ -3412,6 +3410,20 @@ static inline bool skip_blocked_update(struct sched_entity *se)
return true;
}
+static void propagate_exec_runtime(struct cfs_rq *cfs_rq,
+ struct sched_entity *se)
+{
+ struct sched_entity *parent = se->parent;
+ u64 diff = se->sum_exec_runtime - se->propagated_exec_runtime;
+
+ if (parent) {
+ parent->sum_exec_runtime += diff;
+ account_cfs_rq_runtime(cfs_rq, diff);
+ }
+
+ se->propagated_exec_runtime = se->sum_exec_runtime;
+}
+
#else /* CONFIG_FAIR_GROUP_SCHED */
static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
@@ -3423,6 +3435,11 @@ static inline int propagate_entity_load_avg(struct sched_entity *se)
static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum) {}
+static void propagate_exec_runtime(struct cfs_rq *cfs_rq,
+ struct sched_entity *se);
+{
+}
+
#endif /* CONFIG_FAIR_GROUP_SCHED */
/**
@@ -10157,6 +10174,7 @@ static void propagate_entity_cfs_rq(struct sched_entity *se, int flags)
if (!(flags & DO_ATTACH))
break;
+ propagate_exec_runtime(cfs_rq, se);
update_cfs_group(se);
}
}
--
2.20.1