Re: [PATCH 02/15] sched_ext: Wrap global DSQs in per-node structure
From: Emil Tsalapatis
Date: Fri Mar 06 2026 - 15:53:03 EST
On Fri Mar 6, 2026 at 2:06 PM EST, Tejun Heo wrote:
> Global DSQs are currently stored as an array of scx_dispatch_q pointers,
> one per NUMA node. To allow adding more per-node data structures, wrap the
> global DSQ in scx_sched_pnode and replace global_dsqs with pnode array.
>
> NUMA-aware allocation is maintained. No functional changes.
>
> Signed-off-by: Tejun Heo <tj@xxxxxxxxxx>
Reviewed-by: Emil Tsalapatis <emil@xxxxxxxxxxxxxxx>
> ---
> kernel/sched/ext.c | 32 ++++++++++++++++----------------
> kernel/sched/ext_internal.h | 6 +++++-
> 2 files changed, 21 insertions(+), 17 deletions(-)
>
> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
> index fe222df1d494..9232abea4f22 100644
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@ -344,7 +344,7 @@ static bool scx_is_descendant(struct scx_sched *sch, struct scx_sched *ancestor)
> static struct scx_dispatch_q *find_global_dsq(struct scx_sched *sch,
> struct task_struct *p)
> {
> - return sch->global_dsqs[cpu_to_node(task_cpu(p))];
> + return &sch->pnode[cpu_to_node(task_cpu(p))]->global_dsq;
> }
>
> static struct scx_dispatch_q *find_user_dsq(struct scx_sched *sch, u64 dsq_id)
> @@ -2229,7 +2229,7 @@ static bool consume_global_dsq(struct scx_sched *sch, struct rq *rq)
> {
> int node = cpu_to_node(cpu_of(rq));
>
> - return consume_dispatch_q(sch, rq, sch->global_dsqs[node]);
> + return consume_dispatch_q(sch, rq, &sch->pnode[node]->global_dsq);
> }
>
> /**
> @@ -4148,8 +4148,8 @@ static void scx_sched_free_rcu_work(struct work_struct *work)
> free_percpu(sch->pcpu);
>
> for_each_node_state(node, N_POSSIBLE)
> - kfree(sch->global_dsqs[node]);
> - kfree(sch->global_dsqs);
> + kfree(sch->pnode[node]);
> + kfree(sch->pnode);
>
> rhashtable_walk_enter(&sch->dsq_hash, &rht_iter);
> do {
> @@ -5707,23 +5707,23 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops,
> if (ret < 0)
> goto err_free_ei;
>
> - sch->global_dsqs = kzalloc_objs(sch->global_dsqs[0], nr_node_ids);
> - if (!sch->global_dsqs) {
> + sch->pnode = kzalloc_objs(sch->pnode[0], nr_node_ids);
> + if (!sch->pnode) {
> ret = -ENOMEM;
> goto err_free_hash;
> }
>
> for_each_node_state(node, N_POSSIBLE) {
> - struct scx_dispatch_q *dsq;
> + struct scx_sched_pnode *pnode;
>
> - dsq = kzalloc_node(sizeof(*dsq), GFP_KERNEL, node);
> - if (!dsq) {
> + pnode = kzalloc_node(sizeof(*pnode), GFP_KERNEL, node);
> + if (!pnode) {
> ret = -ENOMEM;
> - goto err_free_gdsqs;
> + goto err_free_pnode;
> }
>
> - init_dsq(dsq, SCX_DSQ_GLOBAL, sch);
> - sch->global_dsqs[node] = dsq;
> + init_dsq(&pnode->global_dsq, SCX_DSQ_GLOBAL, sch);
> + sch->pnode[node] = pnode;
> }
>
> sch->dsp_max_batch = ops->dispatch_max_batch ?: SCX_DSP_DFL_MAX_BATCH;
> @@ -5732,7 +5732,7 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops,
> __alignof__(struct scx_sched_pcpu));
> if (!sch->pcpu) {
> ret = -ENOMEM;
> - goto err_free_gdsqs;
> + goto err_free_pnode;
> }
>
> for_each_possible_cpu(cpu)
> @@ -5819,10 +5819,10 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops,
> kthread_destroy_worker(sch->helper);
> err_free_pcpu:
> free_percpu(sch->pcpu);
> -err_free_gdsqs:
> +err_free_pnode:
> for_each_node_state(node, N_POSSIBLE)
> - kfree(sch->global_dsqs[node]);
> - kfree(sch->global_dsqs);
> + kfree(sch->pnode[node]);
> + kfree(sch->pnode);
> err_free_hash:
> rhashtable_free_and_destroy(&sch->dsq_hash, NULL, NULL);
> err_free_ei:
> diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
> index 4cb97093b872..9e5ebd00ea0c 100644
> --- a/kernel/sched/ext_internal.h
> +++ b/kernel/sched/ext_internal.h
> @@ -975,6 +975,10 @@ struct scx_sched_pcpu {
> struct scx_dsp_ctx dsp_ctx;
> };
>
> +struct scx_sched_pnode {
> + struct scx_dispatch_q global_dsq;
> +};
> +
> struct scx_sched {
> struct sched_ext_ops ops;
> DECLARE_BITMAP(has_op, SCX_OPI_END);
> @@ -988,7 +992,7 @@ struct scx_sched {
> * per-node split isn't sufficient, it can be further split.
> */
> struct rhashtable dsq_hash;
> - struct scx_dispatch_q **global_dsqs;
> + struct scx_sched_pnode **pnode;
> struct scx_sched_pcpu __percpu *pcpu;
>
> u64 slice_dfl;