[RFC PATCH v2 5/5] sched/fair: Simplify ancestor enqueue loops

From: Michal Koutný
Date: Thu Aug 19 2021 - 13:51:07 EST


When a task is enqueued or cfs_rq is unthrottled we have work to do from
the cfs_rq in question possibly up to root. The important nodes on the
path are throttled cfs_rqs or cfs_rqs already enqueud to their parent.

Instead of multiple (interrupted) loops make all work in a single loop
and decide what all needs to be done inside it. This undoes parts of
commit 39f23ce07b93 ("sched/fair: Fix unthrottle_cfs_rq() for
leaf_cfs_rq list") but it should not bring any functional changes.

Note some PELT stats update code is duplicated both in enqueue_entity
and the ancestor loop (update_load_avg, se_update_runnable,
update_cfs_group). It'd be nice to factor these out, however, the later
parts of enqueue_entity rely on the updates, so stick with the current
repetition.

Signed-off-by: Michal Koutný <mkoutny@xxxxxxxx>
---
kernel/sched/fair.c | 57 +++++++++++++++++----------------------------
1 file changed, 21 insertions(+), 36 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 9978485334ec..79f183336fa8 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4883,6 +4883,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
struct sched_entity *se;
long task_delta, idle_task_delta;
+ int enqueue = 1;

cfs_rq->throttled = 0;

@@ -4911,29 +4912,21 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
task_delta = cfs_rq->h_nr_running;
idle_task_delta = cfs_rq->idle_h_nr_running;
for_each_sched_entity(se) {
- if (se->on_rq)
- break;
cfs_rq = cfs_rq_of(se);
- enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP);
-
- cfs_rq->h_nr_running += task_delta;
- cfs_rq->idle_h_nr_running += idle_task_delta;

- /* end evaluation on encountering a throttled cfs_rq */
- if (cfs_rq_throttled(cfs_rq))
- goto unthrottle_throttle;
- }
-
- for_each_sched_entity(se) {
- cfs_rq = cfs_rq_of(se);
-
- update_load_avg(cfs_rq, se, UPDATE_TG);
- se_update_runnable(se);
+ if (se->on_rq)
+ enqueue = 0;
+ if (enqueue)
+ enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP);
+ else {
+ update_load_avg(cfs_rq, se, UPDATE_TG);
+ se_update_runnable(se);
+ /* XXX: no update_cfs_group(se); */
+ }

cfs_rq->h_nr_running += task_delta;
cfs_rq->idle_h_nr_running += idle_task_delta;

-
/* end evaluation on encountering a throttled cfs_rq */
if (cfs_rq_throttled(cfs_rq))
goto unthrottle_throttle;
@@ -5537,6 +5530,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
struct sched_entity *se = &p->se;
int idle_h_nr_running = task_has_idle_policy(p);
int task_new = !(flags & ENQUEUE_WAKEUP);
+ int enqueue = 1;

/*
* The code below (indirectly) updates schedutil which looks at
@@ -5555,27 +5549,18 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT);

for_each_sched_entity(se) {
- if (se->on_rq)
- break;
cfs_rq = cfs_rq_of(se);
- enqueue_entity(cfs_rq, se, flags);

- cfs_rq->h_nr_running++;
- cfs_rq->idle_h_nr_running += idle_h_nr_running;
-
- /* end evaluation on encountering a throttled cfs_rq */
- if (cfs_rq_throttled(cfs_rq))
- goto enqueue_throttle;
-
- flags = ENQUEUE_WAKEUP;
- }
-
- for_each_sched_entity(se) {
- cfs_rq = cfs_rq_of(se);
-
- update_load_avg(cfs_rq, se, UPDATE_TG);
- se_update_runnable(se);
- update_cfs_group(se);
+ if (se->on_rq)
+ enqueue = 0;
+ if (enqueue) {
+ enqueue_entity(cfs_rq, se, flags);
+ flags = ENQUEUE_WAKEUP;
+ } else {
+ update_load_avg(cfs_rq, se, UPDATE_TG);
+ se_update_runnable(se);
+ update_cfs_group(se);
+ }

cfs_rq->h_nr_running++;
cfs_rq->idle_h_nr_running += idle_h_nr_running;
--
2.32.0