Re: [RFC PATCH v6 12/25] sched/rt: Add {alloc/unregister/free}_rt_sched_group

From: Juri Lelli

Date: Thu Jun 11 2026 - 04:46:00 EST


Hello,

On 08/06/26 14:15, Yuri Andriaccio wrote:
> Add allocation and deallocation code for rt-cgroups.
>
> Declare dl_server specific functions (only skeleton, but no
> implementation yet), needed by the deadline servers to be called when
> trying to schedule.
>
> Initialize a cgroup's active context to that of its parent.
>
> Co-developed-by: Alessio Balsini <a.balsini@xxxxxxxx>
> Signed-off-by: Alessio Balsini <a.balsini@xxxxxxxx>
> Co-developed-by: Andrea Parri <parri.andrea@xxxxxxxxx>
> Signed-off-by: Andrea Parri <parri.andrea@xxxxxxxxx>
> Co-developed-by: luca abeni <luca.abeni@xxxxxxxxxxxxxxx>
> Signed-off-by: luca abeni <luca.abeni@xxxxxxxxxxxxxxx>
> Signed-off-by: Yuri Andriaccio <yurand2000@xxxxxxxxx>
> ---

...

> void free_rt_sched_group(struct task_group *tg)
> {
> + int i;
> + unsigned long flags;
> +
> if (!rt_group_sched_enabled())
> return;
> +
> + if (!tg->dl_se || !tg->rt_rq)
> + return;
> +
> + for_each_possible_cpu(i) {
> + if (!tg->dl_se[i] || !tg->rt_rq[i])
> + continue;
> +
> + /*
> + * Shutdown the dl_server and free it
> + *
> + * Since the dl timer is going to be cancelled,
> + * we risk to never decrease the running bw...
> + * Fix this issue by changing the group runtime
> + * to 0 immediately before freeing it.
> + */
> + if (tg->dl_se[i]->dl_runtime)
> + dl_init_tg(tg->dl_se[i], 0, tg->dl_se[i]->dl_period);
> +
> + raw_spin_rq_lock_irqsave(cpu_rq(i), flags);
> + hrtimer_cancel(&tg->dl_se[i]->dl_timer);
> + raw_spin_rq_unlock_irqrestore(cpu_rq(i), flags);

Why do we need to grab rq lock here? I actually fear this can deadlock
with the timer callback.

> + kfree(tg->dl_se[i]);
> +
> + /* Free the local per-cpu runqueue */
> + kfree(rq_of_rt_rq(tg->rt_rq[i]));
> + }
> +
> + kfree(tg->rt_rq);
> + kfree(tg->dl_se);
> }
>
> +static inline void __rt_rq_free(struct rt_rq **rt_rq)
> +{
> + int i;
> +
> + for_each_possible_cpu(i) {
> + kfree(rq_of_rt_rq(rt_rq[i]));
^^
Can this result in NULL pointer deref if __alloc_rt_sched_group_data()
fails for some reason midway in the CPU loop?

> + }
> +
> + kfree(rt_rq);
> +}
> +
> +DEFINE_FREE(rt_rq_free, struct rt_rq **, if (_T) __rt_rq_free(_T))
> +
> +static inline void __dl_se_free(struct sched_dl_entity **dl_se)
> +{
> + int i;
> +
> + for_each_possible_cpu(i) {
> + kfree(dl_se[i]);
> + }
> +
> + kfree(dl_se);
> +}
> +
> +DEFINE_FREE(dl_se_free, struct sched_dl_entity **, if (_T) __dl_se_free(_T))
> +
> +static int __alloc_rt_sched_group_data(struct task_group *tg) {
> + /* Instantiate automatic cleanup in event of kalloc fail */
> + struct rt_rq **tg_rt_rq __free(rt_rq_free) = NULL;
> + struct sched_dl_entity **tg_dl_se __free(dl_se_free) = NULL;
> + struct sched_dl_entity *dl_se __free(kfree) = NULL;
> + struct rq *s_rq __free(kfree) = NULL;
> + int i;
> +
> + tg_rt_rq = kcalloc(nr_cpu_ids, sizeof(struct rt_rq *), GFP_KERNEL);
> + if (!tg_rt_rq)
> + return 0;
> +
> + tg_dl_se = kcalloc(nr_cpu_ids,
> + sizeof(struct sched_dl_entity *), GFP_KERNEL);
> + if (!tg_dl_se)
> + return 0;
> +
> + for_each_possible_cpu(i) {
> + s_rq = kzalloc_node(sizeof(struct rq),
> + GFP_KERNEL, cpu_to_node(i));
> + if (!s_rq)
> + return 0;
> +
> + dl_se = kzalloc_node(sizeof(struct sched_dl_entity),
> + GFP_KERNEL, cpu_to_node(i));
> + if (!dl_se)
> + return 0;
> +
> + tg_rt_rq[i] = &no_free_ptr(s_rq)->rt;
> + tg_dl_se[i] = no_free_ptr(dl_se);
> + }
> +
> + tg->rt_rq = no_free_ptr(tg_rt_rq);
> + tg->dl_se = no_free_ptr(tg_dl_se);
> +
> + return 1;
> +}

...

Thanks,
Juri