[PATCH] sched_ext: Refresh idle state when kicking CPUs

From: Andrea Righi
Date: Wed Jan 01 2025 - 13:25:11 EST


Selecting an idle CPU and marking it as busy without directly
dispatching a task can lead to scheduling inefficiencies, as the CPU
remains incorrectly marked as busy (even if it returns back to an idle
state), making it ineligible for selection in ops.select_cpu() and
similar operations.

This results in suboptimal core utilization, see for example [1]. This
issue was introduced by the pick_next_task() refactoring, where the
in-kernel CPU idle state is now updated only during transitions between
non-idle and idle tasks. Previously, it was refreshed during every idle
task cycle as part of the put_prev_task() call.

To address this, update the idle state when a CPU is kicked from idle,
provided no task is queued to the local DSQ. This ensures the CPU is
correctly marked as idle when not running tasks, avoiding scheduling
bubbles and maintaining efficient core utilization.

[1] https://github.com/sched-ext/scx/pull/1139

Fixes: 7c65ae81ea86 ("sched_ext: Don't call put_prev_task_scx() before picking the next task")
Signed-off-by: Andrea Righi <arighi@xxxxxxxxxx>
---
kernel/sched/ext.c | 18 +++++++++++++++++-
1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 926579624c41..bdee66e7b353 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -6079,6 +6079,19 @@ static bool can_skip_idle_kick(struct rq *rq)
return !is_idle_task(rq->curr) && !(rq->scx.flags & SCX_RQ_IN_BALANCE);
}

+static void refresh_idle_state_on_kick(struct rq *rq)
+{
+ lockdep_assert_rq_held(rq);
+
+ /*
+ * If the CPU is idle and the local DSQ has no queued tasks, update
+ * its idle state to prevent the CPU from staying busy even if it
+ * returns back to idle without executing any task.
+ */
+ if (is_idle_task(rq->curr) && !rq->scx.local_dsq.nr)
+ __scx_update_idle(rq, true);
+}
+
static bool kick_one_cpu(s32 cpu, struct rq *this_rq, unsigned long *pseqs)
{
struct rq *rq = cpu_rq(cpu);
@@ -6104,6 +6117,7 @@ static bool kick_one_cpu(s32 cpu, struct rq *this_rq, unsigned long *pseqs)
should_wait = true;
}

+ refresh_idle_state_on_kick(rq);
resched_curr(rq);
} else {
cpumask_clear_cpu(cpu, this_scx->cpus_to_preempt);
@@ -6123,8 +6137,10 @@ static void kick_one_cpu_if_idle(s32 cpu, struct rq *this_rq)
raw_spin_rq_lock_irqsave(rq, flags);

if (!can_skip_idle_kick(rq) &&
- (cpu_online(cpu) || cpu == cpu_of(this_rq)))
+ (cpu_online(cpu) || cpu == cpu_of(this_rq))) {
+ refresh_idle_state_on_kick(rq);
resched_curr(rq);
+ }

raw_spin_rq_unlock_irqrestore(rq, flags);
}
--
2.47.1