[RFC][PATCH 1/3] sched: Detect per-class runqueue changes
From: Peter Zijlstra
Date: Mon Oct 06 2025 - 06:57:09 EST
Have enqueue/dequeue set a per-class bit in rq->queue_mask. This then
enables easy tracking of which runqueues are modified over a
lock-break.
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
kernel/sched/core.c | 2 ++
kernel/sched/deadline.c | 2 ++
kernel/sched/ext.c | 2 ++
kernel/sched/fair.c | 7 +++++--
kernel/sched/idle.c | 2 ++
kernel/sched/rt.c | 2 ++
kernel/sched/sched.h | 10 ++++++++++
kernel/sched/stop_task.c | 2 ++
8 files changed, 27 insertions(+), 2 deletions(-)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2089,6 +2089,7 @@ void enqueue_task(struct rq *rq, struct
*/
uclamp_rq_inc(rq, p, flags);
+ rq->queue_mask |= p->sched_class->queue_mask;
p->sched_class->enqueue_task(rq, p, flags);
psi_enqueue(p, flags);
@@ -2121,6 +2122,7 @@ inline bool dequeue_task(struct rq *rq,
* and mark the task ->sched_delayed.
*/
uclamp_rq_dec(rq, p);
+ rq->queue_mask |= p->sched_class->queue_mask;
return p->sched_class->dequeue_task(rq, p, flags);
}
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -3094,6 +3094,8 @@ static int task_is_throttled_dl(struct t
DEFINE_SCHED_CLASS(dl) = {
+ .queue_mask = 8,
+
.enqueue_task = enqueue_task_dl,
.dequeue_task = dequeue_task_dl,
.yield_task = yield_task_dl,
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -3234,6 +3234,8 @@ static void scx_cgroup_unlock(void) {}
* their current sched_class. Call them directly from sched core instead.
*/
DEFINE_SCHED_CLASS(ext) = {
+ .queue_mask = 1,
+
.enqueue_task = enqueue_task_scx,
.dequeue_task = dequeue_task_scx,
.yield_task = yield_task_scx,
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -12830,6 +12830,7 @@ static int sched_balance_newidle(struct
}
rcu_read_unlock();
+ this_rq->queue_mask = 0;
raw_spin_rq_unlock(this_rq);
t0 = sched_clock_cpu(this_cpu);
@@ -12887,8 +12888,8 @@ static int sched_balance_newidle(struct
if (this_rq->cfs.h_nr_queued && !pulled_task)
pulled_task = 1;
- /* Is there a task of a high priority class? */
- if (this_rq->nr_running != this_rq->cfs.h_nr_queued)
+ /* If a higher prio class was modified, restart the pick */
+ if (this_rq->queue_mask & ~((fair_sched_class.queue_mask << 1)-1))
pulled_task = -1;
out:
@@ -13623,6 +13624,8 @@ static unsigned int get_rr_interval_fair
*/
DEFINE_SCHED_CLASS(fair) = {
+ .queue_mask = 2,
+
.enqueue_task = enqueue_task_fair,
.dequeue_task = dequeue_task_fair,
.yield_task = yield_task_fair,
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -522,6 +522,8 @@ static void update_curr_idle(struct rq *
*/
DEFINE_SCHED_CLASS(idle) = {
+ .queue_mask = 0,
+
/* no enqueue/yield_task for idle tasks */
/* dequeue is not valid, we print a debug message there: */
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2571,6 +2571,8 @@ static int task_is_throttled_rt(struct t
DEFINE_SCHED_CLASS(rt) = {
+ .queue_mask = 4,
+
.enqueue_task = enqueue_task_rt,
.dequeue_task = dequeue_task_rt,
.yield_task = yield_task_rt,
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1118,6 +1118,7 @@ struct rq {
/* runqueue lock: */
raw_spinlock_t __lock;
+ unsigned int queue_mask;
unsigned int nr_running;
#ifdef CONFIG_NUMA_BALANCING
unsigned int nr_numa_running;
@@ -2414,6 +2415,15 @@ struct sched_class {
#ifdef CONFIG_UCLAMP_TASK
int uclamp_enabled;
#endif
+ /*
+ * idle: 0
+ * ext: 1
+ * fair: 2
+ * rt: 4
+ * dl: 8
+ * stop: 16
+ */
+ unsigned int queue_mask;
/*
* move_queued_task/activate_task/enqueue_task: rq->lock
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -99,6 +99,8 @@ static void update_curr_stop(struct rq *
*/
DEFINE_SCHED_CLASS(stop) = {
+ .queue_mask = 16,
+
.enqueue_task = enqueue_task_stop,
.dequeue_task = dequeue_task_stop,
.yield_task = yield_task_stop,