[PATCH v2] sched/fair: delayed dequeued entity bias wakeup preempt short
From: Vincent Guittot
Date: Wed Apr 22 2026 - 05:36:56 EST
When checking if a shorter slice entity can preempt curr, we compare it with
the next entity to be picked but delayed dequeue entities can screw the
decision whereas they will be dequeued when picking next entity.
Dequeue them while checking for preemption as they will be dequeued anyway
when picking next entity.
tip/sched/core tip/sched/core +this patch
cyclictest slice (ms) (default)2.8 8 8
hackbench slice (ms) (default)2.8 20 20
Total Samples | 22559 22595 22683
Average (us) | 157 64( 59%) 59( 8%)
Median (P50) (us) | 57 57( 0%) 58(- 2%)
90th Percentile (us) | 64 60( 6%) 60( 0%)
99th Percentile (us) | 2407 67( 97%) 67( 0%)
99.9th Percentile (us) | 3400 2288( 33%) 727( 68%)
Maximum (us) | 5037 9252(-84%) 7461( 19%)
Signed-off-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
---
v2 changes:
- use pick_next_entity() instead of duplicating the code in
wakeup_preempt_fair()
kernel/sched/fair.c | 27 ++++++++++++++-------------
1 file changed, 14 insertions(+), 13 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f179faf7a6a1..d99e56b6dcc9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1099,7 +1099,7 @@ static inline void cancel_protect_slice(struct sched_entity *se)
*
* Which allows tree pruning through eligibility.
*/
-static struct sched_entity *__pick_eevdf(struct cfs_rq *cfs_rq, bool protect)
+static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq, bool protect)
{
struct rb_node *node = cfs_rq->tasks_timeline.rb_root.rb_node;
struct sched_entity *se = __pick_first_entity(cfs_rq);
@@ -1170,11 +1170,6 @@ static struct sched_entity *__pick_eevdf(struct cfs_rq *cfs_rq, bool protect)
return best;
}
-static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
-{
- return __pick_eevdf(cfs_rq, true);
-}
-
struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
{
struct rb_node *last = rb_last(&cfs_rq->tasks_timeline.rb_root);
@@ -5749,11 +5744,11 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags);
* 4) do not run the "skip" process, if something else is available
*/
static struct sched_entity *
-pick_next_entity(struct rq *rq, struct cfs_rq *cfs_rq)
+pick_next_entity(struct rq *rq, struct cfs_rq *cfs_rq, bool protect)
{
struct sched_entity *se;
- se = pick_eevdf(cfs_rq);
+ se = pick_eevdf(cfs_rq, protect);
if (se->sched_delayed) {
dequeue_entities(rq, se, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
/*
@@ -9015,7 +9010,7 @@ static void wakeup_preempt_fair(struct rq *rq, struct task_struct *p, int wake_f
{
enum preempt_wakeup_action preempt_action = PREEMPT_WAKEUP_PICK;
struct task_struct *donor = rq->donor;
- struct sched_entity *se = &donor->se, *pse = &p->se;
+ struct sched_entity *nse, *se = &donor->se, *pse = &p->se;
struct cfs_rq *cfs_rq = task_cfs_rq(donor);
int cse_is_idle, pse_is_idle;
@@ -9126,11 +9121,17 @@ static void wakeup_preempt_fair(struct rq *rq, struct task_struct *p, int wake_f
}
pick:
+ nse = pick_next_entity(rq, cfs_rq, preempt_action != PREEMPT_WAKEUP_SHORT);
+ /* If @p has become the most eligible task, force preemption */
+ if (nse == pse)
+ goto preempt;
+
/*
- * If @p has become the most eligible task, force preemption.
+ * Because p is enqueued, nse being null can only mean that we
+ * dequeued a delayed task.
*/
- if (__pick_eevdf(cfs_rq, preempt_action != PREEMPT_WAKEUP_SHORT) == pse)
- goto preempt;
+ if (!nse)
+ goto pick;
if (sched_feat(RUN_TO_PARITY))
update_protect_slice(cfs_rq, se);
@@ -9167,7 +9168,7 @@ static struct task_struct *pick_task_fair(struct rq *rq, struct rq_flags *rf)
throttled |= check_cfs_rq_runtime(cfs_rq);
- se = pick_next_entity(rq, cfs_rq);
+ se = pick_next_entity(rq, cfs_rq, true);
if (!se)
goto again;
cfs_rq = group_cfs_rq(se);
--
2.43.0