Re: [PATCH 01/12] sched: Employ sched_change guards

From: Andrea Righi

Date: Tue Oct 07 2025 - 04:21:07 EST


Hi Peter,

On Mon, Oct 06, 2025 at 12:44:03PM +0200, Peter Zijlstra wrote:
> As proposed a long while ago -- and half done by scx -- wrap the
> scheduler's 'change' pattern in a guard helper.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
> Acked-by: Tejun Heo <tj@xxxxxxxxxx>
> ---
...
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -3885,23 +3885,22 @@ extern void check_class_changed(struct r
> extern struct balance_callback *splice_balance_callbacks(struct rq *rq);
> extern void balance_callbacks(struct rq *rq, struct balance_callback *head);
>
> -#ifdef CONFIG_SCHED_CLASS_EXT
> -/*
> - * Used by SCX in the enable/disable paths to move tasks between sched_classes
> - * and establish invariants.
> - */
> -struct sched_enq_and_set_ctx {

Not necessarily for this patch, we can add it later, but I kinda liked the
comment that briefly explained how the context is used. Maybe having
something along these lines could be helpful?

/*
* Used to ensure the correct sequence of task state transitions, such as
* switching between sched_classes, changing CPU affinity, priority, or
* updating the queued/running state.
*/

> +struct sched_change_ctx {
> struct task_struct *p;
> - int queue_flags;
> + int flags;
> bool queued;
> bool running;
> };
>
> -void sched_deq_and_put_task(struct task_struct *p, int queue_flags,
> - struct sched_enq_and_set_ctx *ctx);
> -void sched_enq_and_set_task(struct sched_enq_and_set_ctx *ctx);
> +struct sched_change_ctx *sched_change_begin(struct task_struct *p, unsigned int flags);
> +void sched_change_end(struct sched_change_ctx *ctx);
>
> -#endif /* CONFIG_SCHED_CLASS_EXT */
> +DEFINE_CLASS(sched_change, struct sched_change_ctx *,
> + sched_change_end(_T),
> + sched_change_begin(p, flags),
> + struct task_struct *p, unsigned int flags)
> +
> +DEFINE_CLASS_IS_UNCONDITIONAL(sched_change)
>
> #include "ext.h"
>
> --- a/kernel/sched/syscalls.c
> +++ b/kernel/sched/syscalls.c
> @@ -64,7 +64,6 @@ static int effective_prio(struct task_st
>
> void set_user_nice(struct task_struct *p, long nice)
> {
> - bool queued, running;
> struct rq *rq;
> int old_prio;
>
> @@ -90,22 +89,12 @@ void set_user_nice(struct task_struct *p
> return;
> }
>
> - queued = task_on_rq_queued(p);
> - running = task_current_donor(rq, p);
> - if (queued)
> - dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK);
> - if (running)
> - put_prev_task(rq, p);
> -
> - p->static_prio = NICE_TO_PRIO(nice);
> - set_load_weight(p, true);
> - old_prio = p->prio;
> - p->prio = effective_prio(p);
> -
> - if (queued)
> - enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
> - if (running)
> - set_next_task(rq, p);
> + scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK) {
> + p->static_prio = NICE_TO_PRIO(nice);
> + set_load_weight(p, true);
> + old_prio = p->prio;
> + p->prio = effective_prio(p);
> + }
>
> /*
> * If the task increased its priority or is running and
> @@ -515,7 +504,7 @@ int __sched_setscheduler(struct task_str
> bool user, bool pi)
> {
> int oldpolicy = -1, policy = attr->sched_policy;
> - int retval, oldprio, newprio, queued, running;
> + int retval, oldprio, newprio;
> const struct sched_class *prev_class, *next_class;
> struct balance_callback *head;
> struct rq_flags rf;
> @@ -698,33 +687,25 @@ int __sched_setscheduler(struct task_str
> if (prev_class != next_class && p->se.sched_delayed)
> dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
>
> - queued = task_on_rq_queued(p);
> - running = task_current_donor(rq, p);
> - if (queued)
> - dequeue_task(rq, p, queue_flags);
> - if (running)
> - put_prev_task(rq, p);
> -
> - if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {
> - __setscheduler_params(p, attr);
> - p->sched_class = next_class;
> - p->prio = newprio;
> - }
> - __setscheduler_uclamp(p, attr);
> - check_class_changing(rq, p, prev_class);
> + scoped_guard (sched_change, p, queue_flags) {
>
> - if (queued) {
> - /*
> - * We enqueue to tail when the priority of a task is
> - * increased (user space view).
> - */
> - if (oldprio < p->prio)
> - queue_flags |= ENQUEUE_HEAD;
> + if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {
> + __setscheduler_params(p, attr);
> + p->sched_class = next_class;
> + p->prio = newprio;
> + }
> + __setscheduler_uclamp(p, attr);
> + check_class_changing(rq, p, prev_class);
>
> - enqueue_task(rq, p, queue_flags);
> + if (scope->queued) {
> + /*
> + * We enqueue to tail when the priority of a task is
> + * increased (user space view).
> + */
> + if (oldprio < p->prio)
> + scope->flags |= ENQUEUE_HEAD;
> + }
> }
> - if (running)
> - set_next_task(rq, p);
>
> check_class_changed(rq, p, prev_class, oldprio);
>
>
>

Thanks,
-Andrea