[PATCH 13/24] sched/fair: Prepare pick_next_task() for delayed dequeue

From: Peter Zijlstra
Date: Sat Jul 27 2024 - 07:06:22 EST


Delayed dequeue's natural end is when it gets picked again. Ensure
pick_next_task() knows what to do with delayed tasks.

Note, this relies on the earlier patch that made pick_next_task()
state invariant -- it will restart the pick on dequeue, because
obviously the just dequeued task is no longer eligible.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
kernel/sched/fair.c | 23 +++++++++++++++++++----
1 file changed, 19 insertions(+), 4 deletions(-)

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5453,6 +5453,8 @@ set_next_entity(struct cfs_rq *cfs_rq, s
se->prev_sum_exec_runtime = se->sum_exec_runtime;
}

+static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags);
+
/*
* Pick the next process, keeping these things in mind, in this order:
* 1) keep things fair between processes/task groups
@@ -5461,16 +5463,27 @@ set_next_entity(struct cfs_rq *cfs_rq, s
* 4) do not run the "skip" process, if something else is available
*/
static struct sched_entity *
-pick_next_entity(struct cfs_rq *cfs_rq)
+pick_next_entity(struct rq *rq, struct cfs_rq *cfs_rq)
{
/*
* Enabling NEXT_BUDDY will affect latency but not fairness.
*/
if (sched_feat(NEXT_BUDDY) &&
- cfs_rq->next && entity_eligible(cfs_rq, cfs_rq->next))
+ cfs_rq->next && entity_eligible(cfs_rq, cfs_rq->next)) {
+ /* ->next will never be delayed */
+ SCHED_WARN_ON(cfs_rq->next->sched_delayed);
return cfs_rq->next;
+ }
+
+ struct sched_entity *se = pick_eevdf(cfs_rq);
+ if (se->sched_delayed) {
+ dequeue_entities(rq, se, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
+ SCHED_WARN_ON(se->sched_delayed);
+ SCHED_WARN_ON(se->on_rq);

- return pick_eevdf(cfs_rq);
+ return NULL;
+ }
+ return se;
}

static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq);
@@ -8478,7 +8491,9 @@ static struct task_struct *pick_task_fai
if (unlikely(check_cfs_rq_runtime(cfs_rq)))
goto again;

- se = pick_next_entity(cfs_rq);
+ se = pick_next_entity(rq, cfs_rq);
+ if (!se)
+ goto again;
cfs_rq = group_cfs_rq(se);
} while (cfs_rq);