[PATCH 05/10] sched_ext: Fix TOCTOU race in consume_remote_task()

From: Andrea Righi

Date: Wed May 06 2026 - 13:47:58 EST


When pulling a task from a non-local DSQ, consume_dispatch_q() checks if
the task can run on the destination rq via task_can_run_on_remote_rq().
However, it then drops the destination rq lock and locks the source rq
in consume_remote_task() -> unlink_dsq_and_lock_src_rq(). During this
window, the task might have become migration disabled, making it invalid
to migrate it to the destination rq.

Fix this by re-evaluating task_can_run_on_remote_rq() in
consume_remote_task() after the source rq is locked. If the task can no
longer be migrated, we clear its DSQ association, reset the holding CPU,
and enqueue it to the source rq's local DSQ instead.

Signed-off-by: Andrea Righi <arighi@xxxxxxxxxx>
---
kernel/sched/ext.c | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index d64b1283fa851..a70f8693b906f 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -2418,13 +2418,24 @@ static bool unlink_dsq_and_lock_src_rq(struct task_struct *p,
!WARN_ON_ONCE(src_rq != task_rq(p));
}

-static bool consume_remote_task(struct rq *this_rq,
+static bool consume_remote_task(struct scx_sched *sch, struct rq *this_rq,
struct task_struct *p, u64 enq_flags,
struct scx_dispatch_q *dsq, struct rq *src_rq)
{
raw_spin_rq_unlock(this_rq);

if (unlink_dsq_and_lock_src_rq(p, dsq, src_rq)) {
+ if (unlikely(!task_can_run_on_remote_rq(sch, p, this_rq, true))) {
+ p->scx.dsq = NULL;
+ p->scx.holding_cpu = -1;
+ dispatch_enqueue(sch, src_rq, &src_rq->scx.local_dsq, p,
+ enq_flags | SCX_ENQ_CLEAR_OPSS);
+ if (sched_class_above(p->sched_class, src_rq->donor->sched_class))
+ resched_curr(src_rq);
+ raw_spin_rq_unlock(src_rq);
+ raw_spin_rq_lock(this_rq);
+ return false;
+ }
move_remote_task_to_local_dsq(p, enq_flags, src_rq, this_rq);
return true;
} else {
@@ -2541,7 +2552,7 @@ static bool consume_dispatch_q(struct scx_sched *sch, struct rq *rq,
}

if (task_can_run_on_remote_rq(sch, p, rq, false)) {
- if (likely(consume_remote_task(rq, p, enq_flags, dsq, task_rq)))
+ if (likely(consume_remote_task(sch, rq, p, enq_flags, dsq, task_rq)))
return true;
goto retry;
}
--
2.54.0