[PATCH 02/15] sched_ext: Wrap global DSQs in per-node structure
From: Tejun Heo
Date: Fri Mar 06 2026 - 14:07:09 EST
Global DSQs are currently stored as an array of scx_dispatch_q pointers,
one per NUMA node. To allow adding more per-node data structures, wrap the
global DSQ in scx_sched_pnode and replace global_dsqs with pnode array.
NUMA-aware allocation is maintained. No functional changes.
Signed-off-by: Tejun Heo <tj@xxxxxxxxxx>
---
kernel/sched/ext.c | 32 ++++++++++++++++----------------
kernel/sched/ext_internal.h | 6 +++++-
2 files changed, 21 insertions(+), 17 deletions(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index fe222df1d494..9232abea4f22 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -344,7 +344,7 @@ static bool scx_is_descendant(struct scx_sched *sch, struct scx_sched *ancestor)
static struct scx_dispatch_q *find_global_dsq(struct scx_sched *sch,
struct task_struct *p)
{
- return sch->global_dsqs[cpu_to_node(task_cpu(p))];
+ return &sch->pnode[cpu_to_node(task_cpu(p))]->global_dsq;
}
static struct scx_dispatch_q *find_user_dsq(struct scx_sched *sch, u64 dsq_id)
@@ -2229,7 +2229,7 @@ static bool consume_global_dsq(struct scx_sched *sch, struct rq *rq)
{
int node = cpu_to_node(cpu_of(rq));
- return consume_dispatch_q(sch, rq, sch->global_dsqs[node]);
+ return consume_dispatch_q(sch, rq, &sch->pnode[node]->global_dsq);
}
/**
@@ -4148,8 +4148,8 @@ static void scx_sched_free_rcu_work(struct work_struct *work)
free_percpu(sch->pcpu);
for_each_node_state(node, N_POSSIBLE)
- kfree(sch->global_dsqs[node]);
- kfree(sch->global_dsqs);
+ kfree(sch->pnode[node]);
+ kfree(sch->pnode);
rhashtable_walk_enter(&sch->dsq_hash, &rht_iter);
do {
@@ -5707,23 +5707,23 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops,
if (ret < 0)
goto err_free_ei;
- sch->global_dsqs = kzalloc_objs(sch->global_dsqs[0], nr_node_ids);
- if (!sch->global_dsqs) {
+ sch->pnode = kzalloc_objs(sch->pnode[0], nr_node_ids);
+ if (!sch->pnode) {
ret = -ENOMEM;
goto err_free_hash;
}
for_each_node_state(node, N_POSSIBLE) {
- struct scx_dispatch_q *dsq;
+ struct scx_sched_pnode *pnode;
- dsq = kzalloc_node(sizeof(*dsq), GFP_KERNEL, node);
- if (!dsq) {
+ pnode = kzalloc_node(sizeof(*pnode), GFP_KERNEL, node);
+ if (!pnode) {
ret = -ENOMEM;
- goto err_free_gdsqs;
+ goto err_free_pnode;
}
- init_dsq(dsq, SCX_DSQ_GLOBAL, sch);
- sch->global_dsqs[node] = dsq;
+ init_dsq(&pnode->global_dsq, SCX_DSQ_GLOBAL, sch);
+ sch->pnode[node] = pnode;
}
sch->dsp_max_batch = ops->dispatch_max_batch ?: SCX_DSP_DFL_MAX_BATCH;
@@ -5732,7 +5732,7 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops,
__alignof__(struct scx_sched_pcpu));
if (!sch->pcpu) {
ret = -ENOMEM;
- goto err_free_gdsqs;
+ goto err_free_pnode;
}
for_each_possible_cpu(cpu)
@@ -5819,10 +5819,10 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops,
kthread_destroy_worker(sch->helper);
err_free_pcpu:
free_percpu(sch->pcpu);
-err_free_gdsqs:
+err_free_pnode:
for_each_node_state(node, N_POSSIBLE)
- kfree(sch->global_dsqs[node]);
- kfree(sch->global_dsqs);
+ kfree(sch->pnode[node]);
+ kfree(sch->pnode);
err_free_hash:
rhashtable_free_and_destroy(&sch->dsq_hash, NULL, NULL);
err_free_ei:
diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
index 4cb97093b872..9e5ebd00ea0c 100644
--- a/kernel/sched/ext_internal.h
+++ b/kernel/sched/ext_internal.h
@@ -975,6 +975,10 @@ struct scx_sched_pcpu {
struct scx_dsp_ctx dsp_ctx;
};
+struct scx_sched_pnode {
+ struct scx_dispatch_q global_dsq;
+};
+
struct scx_sched {
struct sched_ext_ops ops;
DECLARE_BITMAP(has_op, SCX_OPI_END);
@@ -988,7 +992,7 @@ struct scx_sched {
* per-node split isn't sufficient, it can be further split.
*/
struct rhashtable dsq_hash;
- struct scx_dispatch_q **global_dsqs;
+ struct scx_sched_pnode **pnode;
struct scx_sched_pcpu __percpu *pcpu;
u64 slice_dfl;
--
2.53.0