[RFC][PATCH 05/10] sched/fair: Unify pick_{,next_}_task_fair()
From: Peter Zijlstra
Date: Fri Apr 05 2024 - 07:04:44 EST
Implement pick_next_task_fair() in terms of pick_task_fair() to
de-duplicate the pick loop.
More importantly, this makes all the pick loops use the
state-invariant form, which is useful to introduce further re-try
conditions in later patches.
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
kernel/sched/fair.c | 60 ++++++----------------------------------------------
1 file changed, 8 insertions(+), 52 deletions(-)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8415,7 +8415,6 @@ static void check_preempt_wakeup_fair(st
resched_curr(rq);
}
-#ifdef CONFIG_SMP
static struct task_struct *pick_task_fair(struct rq *rq)
{
struct sched_entity *se;
@@ -8427,7 +8426,7 @@ static struct task_struct *pick_task_fai
return NULL;
do {
- /* When we pick for a remote RQ, we'll not have done put_prev_entity() */
+ /* Might not have done put_prev_entity() */
if (cfs_rq->curr && cfs_rq->curr->on_rq)
update_curr(cfs_rq);
@@ -8440,19 +8439,20 @@ static struct task_struct *pick_task_fai
return task_of(se);
}
-#endif
struct task_struct *
pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
{
- struct cfs_rq *cfs_rq = &rq->cfs;
struct sched_entity *se;
struct task_struct *p;
+ struct cfs_rq *cfs_rq;
int new_tasks;
again:
- if (!sched_fair_runnable(rq))
+ p = pick_task_fair(rq);
+ if (!p)
goto idle;
+ se = &p->se;
#ifdef CONFIG_FAIR_GROUP_SCHED
if (!prev || prev->sched_class != &fair_sched_class)
@@ -8464,46 +8464,7 @@ pick_next_task_fair(struct rq *rq, struc
*
* Therefore attempt to avoid putting and setting the entire cgroup
* hierarchy, only change the part that actually changes.
- */
-
- do {
- struct sched_entity *curr = cfs_rq->curr;
-
- /*
- * Since we got here without doing put_prev_entity() we also
- * have to consider cfs_rq->curr. If it is still a runnable
- * entity, update_curr() will update its vruntime, otherwise
- * forget we've ever seen it.
- */
- if (curr) {
- if (curr->on_rq)
- update_curr(cfs_rq);
- else
- curr = NULL;
-
- /*
- * This call to check_cfs_rq_runtime() will do the
- * throttle and dequeue its entity in the parent(s).
- * Therefore the nr_running test will indeed
- * be correct.
- */
- if (unlikely(check_cfs_rq_runtime(cfs_rq))) {
- cfs_rq = &rq->cfs;
-
- if (!cfs_rq->nr_running)
- goto idle;
-
- goto simple;
- }
- }
-
- se = pick_next_entity(cfs_rq);
- cfs_rq = group_cfs_rq(se);
- } while (cfs_rq);
-
- p = task_of(se);
-
- /*
+ *
* Since we haven't yet done put_prev_entity and if the selected task
* is a different task than we started out with, try and touch the
* least amount of cfs_rqs.
@@ -8535,13 +8496,8 @@ pick_next_task_fair(struct rq *rq, struc
if (prev)
put_prev_task(rq, prev);
- do {
- se = pick_next_entity(cfs_rq);
- set_next_entity(cfs_rq, se);
- cfs_rq = group_cfs_rq(se);
- } while (cfs_rq);
-
- p = task_of(se);
+ for_each_sched_entity(se)
+ set_next_entity(cfs_rq_of(se), se);
done: __maybe_unused;
#ifdef CONFIG_SMP