Re: [RFC PATCH v2 5/5] sched/fair: Simplify ancestor enqueue loops

From: Vincent Guittot
Date: Thu Sep 09 2021 - 10:40:42 EST


On Thu, 19 Aug 2021 at 19:50, Michal Koutný <mkoutny@xxxxxxxx> wrote:
>
> When a task is enqueued or cfs_rq is unthrottled we have work to do from
> the cfs_rq in question possibly up to root. The important nodes on the
> path are throttled cfs_rqs or cfs_rqs already enqueud to their parent.
>
> Instead of multiple (interrupted) loops make all work in a single loop
> and decide what all needs to be done inside it. This undoes parts of

These multiple break loops have been done to make unthrottle_cfs_rq,
throttle_cfs_rq, enqueue_task_fair and dequeue_task_fair to follow the
same pattern and I don't see any good reason to break this

> commit 39f23ce07b93 ("sched/fair: Fix unthrottle_cfs_rq() for
> leaf_cfs_rq list") but it should not bring any functional changes.
>
> Note some PELT stats update code is duplicated both in enqueue_entity
> and the ancestor loop (update_load_avg, se_update_runnable,
> update_cfs_group). It'd be nice to factor these out, however, the later
> parts of enqueue_entity rely on the updates, so stick with the current
> repetition.
>
> Signed-off-by: Michal Koutný <mkoutny@xxxxxxxx>
> ---
> kernel/sched/fair.c | 57 +++++++++++++++++----------------------------
> 1 file changed, 21 insertions(+), 36 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 9978485334ec..79f183336fa8 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -4883,6 +4883,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
> struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
> struct sched_entity *se;
> long task_delta, idle_task_delta;
> + int enqueue = 1;
>
> cfs_rq->throttled = 0;
>
> @@ -4911,29 +4912,21 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
> task_delta = cfs_rq->h_nr_running;
> idle_task_delta = cfs_rq->idle_h_nr_running;
> for_each_sched_entity(se) {
> - if (se->on_rq)
> - break;
> cfs_rq = cfs_rq_of(se);
> - enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP);
> -
> - cfs_rq->h_nr_running += task_delta;
> - cfs_rq->idle_h_nr_running += idle_task_delta;
>
> - /* end evaluation on encountering a throttled cfs_rq */
> - if (cfs_rq_throttled(cfs_rq))
> - goto unthrottle_throttle;
> - }
> -
> - for_each_sched_entity(se) {
> - cfs_rq = cfs_rq_of(se);
> -
> - update_load_avg(cfs_rq, se, UPDATE_TG);
> - se_update_runnable(se);
> + if (se->on_rq)
> + enqueue = 0;
> + if (enqueue)
> + enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP);
> + else {
> + update_load_avg(cfs_rq, se, UPDATE_TG);
> + se_update_runnable(se);
> + /* XXX: no update_cfs_group(se); */
> + }
>
> cfs_rq->h_nr_running += task_delta;
> cfs_rq->idle_h_nr_running += idle_task_delta;
>
> -
> /* end evaluation on encountering a throttled cfs_rq */
> if (cfs_rq_throttled(cfs_rq))
> goto unthrottle_throttle;
> @@ -5537,6 +5530,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> struct sched_entity *se = &p->se;
> int idle_h_nr_running = task_has_idle_policy(p);
> int task_new = !(flags & ENQUEUE_WAKEUP);
> + int enqueue = 1;
>
> /*
> * The code below (indirectly) updates schedutil which looks at
> @@ -5555,27 +5549,18 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT);
>
> for_each_sched_entity(se) {
> - if (se->on_rq)
> - break;
> cfs_rq = cfs_rq_of(se);
> - enqueue_entity(cfs_rq, se, flags);
>
> - cfs_rq->h_nr_running++;
> - cfs_rq->idle_h_nr_running += idle_h_nr_running;
> -
> - /* end evaluation on encountering a throttled cfs_rq */
> - if (cfs_rq_throttled(cfs_rq))
> - goto enqueue_throttle;
> -
> - flags = ENQUEUE_WAKEUP;
> - }
> -
> - for_each_sched_entity(se) {
> - cfs_rq = cfs_rq_of(se);
> -
> - update_load_avg(cfs_rq, se, UPDATE_TG);
> - se_update_runnable(se);
> - update_cfs_group(se);
> + if (se->on_rq)
> + enqueue = 0;
> + if (enqueue) {
> + enqueue_entity(cfs_rq, se, flags);
> + flags = ENQUEUE_WAKEUP;
> + } else {
> + update_load_avg(cfs_rq, se, UPDATE_TG);
> + se_update_runnable(se);
> + update_cfs_group(se);
> + }
>
> cfs_rq->h_nr_running++;
> cfs_rq->idle_h_nr_running += idle_h_nr_running;
> --
> 2.32.0
>