[PATCH v4 3/3] sched_ext: Fix cpu_released while changing sched policy of the running task
From: liuwenfang
Date: Tue Aug 19 2025 - 03:07:16 EST
Priority inheritance policy should be cared, eg., one SCX task can be
boosted to REAL-TIME while holding a mutex lock, and then restored while
releasing the lock. The value cpu_released is fixed when changing the
sched class of the running task.
Signed-off-by: Wenfang Liu liuwenfang@xxxxxxxxx
---
kernel/sched/ext.c | 44 ++++++++++++++++++++++++++++++--------------
kernel/sched/sched.h | 4 ++--
2 files changed, 32 insertions(+), 16 deletions(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 98a05025b..bf4512908 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -2959,6 +2959,8 @@ static void flush_dispatch_buf(struct rq *rq)
dspc->cursor = 0;
}
+static void scx_maybe_cpu_acquire(struct rq *rq);
+
static int balance_one(struct rq *rq, struct task_struct *prev)
{
struct scx_dsp_ctx *dspc = this_cpu_ptr(scx_dsp_ctx);
@@ -2970,18 +2972,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
rq->scx.flags |= SCX_RQ_IN_BALANCE;
rq->scx.flags &= ~(SCX_RQ_BAL_PENDING | SCX_RQ_BAL_KEEP);
- if (static_branch_unlikely(&scx_ops_cpu_preempt) &&
- unlikely(rq->scx.cpu_released)) {
- /*
- * If the previous sched_class for the current CPU was not SCX,
- * notify the BPF scheduler that it again has control of the
- * core. This callback complements ->cpu_release(), which is
- * emitted in switch_class().
- */
- if (SCX_HAS_OP(cpu_acquire))
- SCX_CALL_OP(SCX_KF_REST, cpu_acquire, rq, cpu_of(rq), NULL);
- rq->scx.cpu_released = false;
- }
+ scx_maybe_cpu_acquire(rq);
if (prev_on_scx) {
update_curr_scx(rq);
@@ -3187,7 +3178,23 @@ preempt_reason_from_class(const struct sched_class *class)
return SCX_CPU_PREEMPT_UNKNOWN;
}
-void switch_class(struct rq *rq, struct task_struct *next)
+static void scx_maybe_cpu_acquire(struct rq *rq)
+{
+ if (static_branch_unlikely(&scx_ops_cpu_preempt) &&
+ unlikely(rq->scx.cpu_released)) {
+ /*
+ * If the previous sched_class for the current CPU was not SCX,
+ * notify the BPF scheduler that it again has control of the
+ * core. This callback complements ->cpu_release(), which is
+ * emitted in scx_maybe_cpu_release().
+ */
+ if (SCX_HAS_OP(cpu_acquire))
+ SCX_CALL_OP(SCX_KF_REST, cpu_acquire, rq, cpu_of(rq), NULL);
+ rq->scx.cpu_released = false;
+ }
+}
+
+void scx_maybe_cpu_release(struct rq *rq, struct task_struct *next)
{
const struct sched_class *next_class = next->sched_class;
@@ -3881,11 +3888,20 @@ static void switching_to_scx(struct rq *rq, struct task_struct *p)
static void switched_from_scx(struct rq *rq, struct task_struct *p)
{
+ if (task_current(rq, p))
+ scx_maybe_cpu_release(rq, p);
+
scx_ops_disable_task(p);
}
static void wakeup_preempt_scx(struct rq *rq, struct task_struct *p,int wake_flags) {}
-static void switched_to_scx(struct rq *rq, struct task_struct *p) {}
+static void switched_to_scx(struct rq *rq, struct task_struct *p)
+{
+ lockdep_assert_rq_held(rq);
+
+ if (task_current(rq, p))
+ scx_maybe_cpu_acquire(rq);
+}
int scx_check_setscheduler(struct task_struct *p, int policy)
{
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e46becfed..ee0f35d47 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1738,7 +1738,7 @@ static inline void scx_rq_clock_invalidate(struct rq *rq)
WRITE_ONCE(rq->scx.flags, rq->scx.flags & ~SCX_RQ_CLK_VALID);
}
-extern void switch_class(struct rq *rq, struct task_struct *next);
+extern void scx_maybe_cpu_release(struct rq *rq, struct task_struct *next);
static inline void __put_prev_set_next_scx(struct rq *rq,
struct task_struct *prev,
@@ -1759,7 +1759,7 @@ static inline void __put_prev_set_next_scx(struct rq *rq,
if (next->sched_class == &ext_sched_class)
return;
- switch_class(rq, next);
+ scx_maybe_cpu_release(rq, next);
}
#else /* !CONFIG_SCHED_CLASS_EXT */
--
2.17.1