Re: [PATCH 13/15] sched,fair: propagate sum_exec_runtime up the hierarchy

From: Dietmar Eggemann
Date: Wed Aug 28 2019 - 03:51:41 EST


On 22/08/2019 04:17, Rik van Riel wrote:
> Now that enqueue_task_fair and dequeue_task_fair no longer iterate up
> the hierarchy all the time, a method to lazily propagate sum_exec_runtime
> up the hierarchy is necessary.
>
> Once a tick, propagate the newly accumulated exec_runtime up the hierarchy,
> and feed it into CFS bandwidth control.
>
> Remove the pointless call to account_cfs_rq_runtime from update_curr,
> which is always called with a root cfs_rq.

But what about the call to account_cfs_rq_runtime() in
set_curr_task_fair()? Here you always call it with the root cfs_rq.
Shouldn't this be called also in a loop over all se's until !se->parent
(like in propagate_exec_runtime() further below).

> Signed-off-by: Rik van Riel <riel@xxxxxxxxxxx>
> ---
> include/linux/sched.h | 1 +
> kernel/sched/core.c | 1 +
> kernel/sched/fair.c | 22 ++++++++++++++++++++--
> 3 files changed, 22 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 901c710363e7..bdca15b3afe7 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -454,6 +454,7 @@ struct sched_entity {
> int depth;
> unsigned long enqueued_h_load;
> unsigned long enqueued_h_weight;
> + u64 propagated_exec_runtime;
> struct load_weight h_load;
> struct sched_entity *parent;
> /* rq on which this entity is (to be) queued: */
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index fbd96900f715..9915d20e84a9 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -2137,6 +2137,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
> INIT_LIST_HEAD(&p->se.group_node);
>
> #ifdef CONFIG_FAIR_GROUP_SCHED
> + p->se.propagated_exec_runtime = 0;
> p->se.cfs_rq = NULL;
> #endif
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 5cfa3dbeba49..d6c881c5c4d5 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -898,8 +898,6 @@ static void update_curr(struct cfs_rq *cfs_rq)
> trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
> cgroup_account_cputime(curtask, delta_exec);
> account_group_exec_runtime(curtask, delta_exec);
> -
> - account_cfs_rq_runtime(cfs_rq, delta_exec);
> }
>
> static void update_curr_fair(struct rq *rq)
> @@ -3412,6 +3410,20 @@ static inline bool skip_blocked_update(struct sched_entity *se)
> return true;
> }
>
> +static void propagate_exec_runtime(struct cfs_rq *cfs_rq,
> + struct sched_entity *se)
> +{
> + struct sched_entity *parent = se->parent;
> + u64 diff = se->sum_exec_runtime - se->propagated_exec_runtime;
> +
> + if (parent) {
> + parent->sum_exec_runtime += diff;
> + account_cfs_rq_runtime(cfs_rq, diff);
> + }
> +
> + se->propagated_exec_runtime = se->sum_exec_runtime;
> +}
> +
> #else /* CONFIG_FAIR_GROUP_SCHED */
>
> static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
> @@ -3423,6 +3435,11 @@ static inline int propagate_entity_load_avg(struct sched_entity *se)
>
> static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum) {}
>
> +static void propagate_exec_runtime(struct cfs_rq *cfs_rq,
> + struct sched_entity *se);
> +{
> +}
> +
> #endif /* CONFIG_FAIR_GROUP_SCHED */
>
> /**
> @@ -10157,6 +10174,7 @@ static void propagate_entity_cfs_rq(struct sched_entity *se, int flags)
> if (!(flags & DO_ATTACH))
> break;
>
> + propagate_exec_runtime(cfs_rq, se);
> update_cfs_group(se);
> }
> }
>