[PATCH v4 1/3] sched_ext: Fix pnt_seq calculation when picking the next task
From: liuwenfang
Date: Tue Aug 19 2025 - 02:52:49 EST
Now as the rq->scx.pnt_seq is only incremented when the target CPU
switches from one SCX task to one non-SCX task, the pair CPU would
not exit the busy-wait state reasonably in scx_pair.
In scx_pair, rq->scx.pnt_seq is introduced to improve exclusion
guarantees. The invoking CPU calls scx_bpf_kick_cpu() with
SCX_KICK_WAIT and enters the busy-wait state. It should exit this
state once the target CPU has entered the rescheduling path with
rq->scx.pnt_seq incremented.
So, pnt_seq calculation is moved to put_prev_set_next_task(), it
will be incremented for any task switches on the target CPU, then
the invoking CPU can exit the busy-wait state properly.
Signed-off-by: Wenfang Liu liuwenfang@xxxxxxxxx
---
kernel/sched/ext.c | 10 +---------
kernel/sched/fair.c | 2 +-
kernel/sched/sched.h | 30 +++++++++++++++++++++++++++++-
3 files changed, 31 insertions(+), 11 deletions(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index f5133249f..ba99739d7 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -3191,14 +3191,6 @@ static void switch_class(struct rq *rq, struct task_struct *next)
{
const struct sched_class *next_class = next->sched_class;
-#ifdef CONFIG_SMP
- /*
- * Pairs with the smp_load_acquire() issued by a CPU in
- * kick_cpus_irq_workfn() who is waiting for this CPU to perform a
- * resched.
- */
- smp_store_release(&rq->scx.pnt_seq, rq->scx.pnt_seq + 1);
-#endif
if (!static_branch_unlikely(&scx_ops_cpu_preempt))
return;
@@ -5966,7 +5958,7 @@ static void kick_cpus_irq_workfn(struct irq_work *irq_work)
if (cpu != cpu_of(this_rq)) {
/*
* Pairs with smp_store_release() issued by this CPU in
- * switch_class() on the resched path.
+ * __put_prev_set_next_scx() on the resched path.
*
* We busy-wait here to guarantee that no other task can
* be scheduled on our core before the target CPU has
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0fb9bf995..21214b3fa 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8885,7 +8885,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
if (prev->sched_class != &fair_sched_class)
goto simple;
- __put_prev_set_next_dl_server(rq, prev, p);
+ __put_prev_set_next(rq, prev, p);
/*
* Because of the set_next_buddy() in dequeue_task_fair() it is rather
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 47972f34e..435de61c4 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1738,12 +1738,32 @@ static inline void scx_rq_clock_invalidate(struct rq *rq)
WRITE_ONCE(rq->scx.flags, rq->scx.flags & ~SCX_RQ_CLK_VALID);
}
+static inline void __put_prev_set_next_scx(struct rq *rq,
+ struct task_struct *prev,
+ struct task_struct *next)
+{
+ if (!scx_enabled())
+ return;
+
+#ifdef CONFIG_SMP
+ /*
+ * Pairs with the smp_load_acquire() issued by a CPU in
+ * kick_cpus_irq_workfn() who is waiting for this CPU to perform a
+ * resched.
+ */
+ smp_store_release(&rq->scx.pnt_seq, rq->scx.pnt_seq + 1);
+#endif
+}
+
#else /* !CONFIG_SCHED_CLASS_EXT */
#define scx_enabled() false
#define scx_switched_all() false
static inline void scx_rq_clock_update(struct rq *rq, u64 clock) {}
static inline void scx_rq_clock_invalidate(struct rq *rq) {}
+static inline void __put_prev_set_next_scx(struct rq *rq,
+ struct task_struct *prev,
+ struct task_struct *next) {}
#endif /* !CONFIG_SCHED_CLASS_EXT */
/*
@@ -2457,13 +2477,21 @@ __put_prev_set_next_dl_server(struct rq *rq,
rq->dl_server = NULL;
}
+static inline void __put_prev_set_next(struct rq *rq,
+ struct task_struct *prev,
+ struct task_struct *next)
+{
+ __put_prev_set_next_dl_server(rq, prev, next);
+ __put_prev_set_next_scx(rq, prev, next);
+}
+
static inline void put_prev_set_next_task(struct rq *rq,
struct task_struct *prev,
struct task_struct *next)
{
WARN_ON_ONCE(rq->curr != prev);
- __put_prev_set_next_dl_server(rq, prev, next);
+ __put_prev_set_next(rq, prev, next);
if (next == prev)
return;
--
2.17.1