Re: [PATCH 3/7] sched/debug: Stop and start server based on if it was active
From: Andrea Righi
Date: Mon Feb 02 2026 - 17:56:39 EST
On Mon, Feb 02, 2026 at 10:17:23PM +0100, Peter Zijlstra wrote:
> On Mon, Feb 02, 2026 at 10:13:26PM +0100, Peter Zijlstra wrote:
> > On Mon, Jan 26, 2026 at 10:59:01AM +0100, Andrea Righi wrote:
> > > From: Joel Fernandes <joelagnelf@xxxxxxxxxx>
> > >
> > > Currently the DL server interface for applying parameters checks
> > > CFS-internals to identify if the server is active. This is error-prone
> > > and makes it difficult when adding new servers in the future.
> > >
> > > Fix it, by using dl_server_active() which is also used by the DL server
> > > code to determine if the DL server was started.
> > >
> > > Tested-by: Christian Loehle <christian.loehle@xxxxxxx>
> > > Acked-by: Tejun Heo <tj@xxxxxxxxxx>
> > > Reviewed-by: Juri Lelli <juri.lelli@xxxxxxxxxx>
> > > Reviewed-by: Andrea Righi <arighi@xxxxxxxxxx>
> > > Signed-off-by: Joel Fernandes <joelagnelf@xxxxxxxxxx>
> > > ---
> > > kernel/sched/debug.c | 11 ++++++++---
> > > 1 file changed, 8 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
> > > index 93f009e1076d8..dd793f8f3858a 100644
> > > --- a/kernel/sched/debug.c
> > > +++ b/kernel/sched/debug.c
> > > @@ -354,6 +354,8 @@ static ssize_t sched_fair_server_write(struct file *filp, const char __user *ubu
> > > return err;
> > >
> > > scoped_guard (rq_lock_irqsave, rq) {
> > > + bool is_active;
> > > +
> > > runtime = rq->fair_server.dl_runtime;
> > > period = rq->fair_server.dl_period;
> > >
> > > @@ -376,8 +378,11 @@ static ssize_t sched_fair_server_write(struct file *filp, const char __user *ubu
> > > return -EINVAL;
> > > }
> > >
> > > - update_rq_clock(rq);
> > > - dl_server_stop(&rq->fair_server);
> > > + is_active = dl_server_active(&rq->fair_server);
> > > + if (is_active) {
> > > + update_rq_clock(rq);
> > > + dl_server_stop(&rq->fair_server);
> > > + }
> > >
> > > retval = dl_server_apply_params(&rq->fair_server, runtime, period, 0);
> > >
> > > @@ -385,7 +390,7 @@ static ssize_t sched_fair_server_write(struct file *filp, const char __user *ubu
> > > printk_deferred("Fair server disabled in CPU %d, system may crash due to starvation.\n",
> > > cpu_of(rq));
> > >
> > > - if (rq->cfs.h_nr_queued)
> > > + if (is_active && runtime)
> > > dl_server_start(&rq->fair_server);
> > >
> > > if (retval < 0)
> >
> > Suppose runtime was 0, and gets incremented while there are already
> > tasks enqueued, then the above isn't going to DTRT.
>
Right that's a bug, if the user sets runtime=0, tasks can be enqueued while
the server is disabled, when the user later sets runtime>0 to re-enable the
server, the current code doesn't start it.
Also, as discussed with Juri at LPC, we likely need a better interface than
debugfs for configuring these parameters (possibly a topic for OSPM).
> Something like so perhaps?
>
> ---
> diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
> index 59e650f9d436..884bdf7a292f 100644
> --- a/kernel/sched/debug.c
> +++ b/kernel/sched/debug.c
> @@ -340,7 +340,7 @@ static ssize_t sched_server_write_common(struct file *filp, const char __user *u
> long cpu = (long) ((struct seq_file *) filp->private_data)->private;
> struct rq *rq = cpu_rq(cpu);
> struct sched_dl_entity *dl_se = (struct sched_dl_entity *)server;
> - u64 runtime, period;
> + u64 old_runtime, runtime, period;
> int retval = 0;
> size_t err;
> u64 value;
> @@ -352,7 +352,7 @@ static ssize_t sched_server_write_common(struct file *filp, const char __user *u
> scoped_guard (rq_lock_irqsave, rq) {
> bool is_active;
>
> - runtime = dl_se->dl_runtime;
> + old_runtime = runtime = dl_se->dl_runtime;
> period = dl_se->dl_period;
>
> switch (param) {
> @@ -382,17 +382,20 @@ static ssize_t sched_server_write_common(struct file *filp, const char __user *u
>
> retval = dl_server_apply_params(dl_se, runtime, period, 0);
>
> - if (!runtime)
> - printk_deferred("%s server disabled in CPU %d, system may crash due to starvation.\n",
> - server == &rq->fair_server ? "Fair" : "Ext", cpu_of(rq));
> -
> - if (is_active && runtime)
> + if (runtime)
> dl_server_start(dl_se);
>
> if (retval < 0)
> return retval;
> }
>
> + if (!!old_runtime ^ !!runtime) {
> + pr_info("%s server %sabled in CPU %d, system may crash due to starvation.\n",
> + server == &rq->fair_server ? "Fair" : "Ext",
> + runtime ? "en" : "dis",
> + cpu_of(rq));
Or:
pr_info("%s server %sabled in CPU %d%s\n",
server == &rq->fair_server ? "Fair" : "Ext",
runtime ? "en" : "dis",
cpu_of(rq),
runtime ? "" : ", system may crash due to starvation");
> + }
> +
> *ppos += cnt;
> return cnt;
> }
I like that, it should fix the issue.
Thanks,
-Andrea