[PATCH 3/3] sched_ext: get rid of the scx_selcpu_topo_numa logic

From: Andrea Righi
Date: Tue Dec 03 2024 - 10:56:15 EST


With the introduction of separate per-NUMA node cpumasks, we
automatically track idle CPUs within each NUMA node.

This makes the special logic for determining idle CPUs in each NUMA node
redundant and unnecessary, so we can get rid of it.

Signed-off-by: Andrea Righi <arighi@xxxxxxxxxx>
---
kernel/sched/ext.c | 57 +++++-----------------------------------------
1 file changed, 6 insertions(+), 51 deletions(-)

diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index cff4210e9c7b..6a91d0f5d2a3 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -886,7 +886,6 @@ static DEFINE_STATIC_KEY_FALSE(scx_builtin_idle_enabled);

#ifdef CONFIG_SMP
static DEFINE_STATIC_KEY_FALSE(scx_selcpu_topo_llc);
-static DEFINE_STATIC_KEY_FALSE(scx_selcpu_topo_numa);
#endif

static struct static_key_false scx_has_op[SCX_OPI_END] =
@@ -3235,10 +3234,9 @@ static s32 scx_pick_idle_cpu_from_node(int node, const struct cpumask *cpus_allo

}

-static s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, u64 flags)
+static s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, s32 prev_cpu, u64 flags)
{
- int start = cpu_to_node(smp_processor_id());
- int node, cpu;
+ int node, cpu, start = cpu_to_node(prev_cpu);

for_each_node_state_wrap(node, N_ONLINE, start) {
/*
@@ -3319,9 +3317,8 @@ static bool llc_numa_mismatch(void)
*/
static void update_selcpu_topology(void)
{
- bool enable_llc = false, enable_numa = false;
+ bool enable_llc = false;
struct sched_domain *sd;
- const struct cpumask *cpus;
s32 cpu = cpumask_first(cpu_online_mask);

/*
@@ -3337,37 +3334,18 @@ static void update_selcpu_topology(void)
rcu_read_lock();
sd = rcu_dereference(per_cpu(sd_llc, cpu));
if (sd) {
- if (sd->span_weight < num_online_cpus())
+ if ((sd->span_weight < num_online_cpus()) && llc_numa_mismatch())
enable_llc = true;
}
-
- /*
- * Enable NUMA optimization only when there are multiple NUMA domains
- * among the online CPUs and the NUMA domains don't perfectly overlaps
- * with the LLC domains.
- *
- * If all CPUs belong to the same NUMA node and the same LLC domain,
- * enabling both NUMA and LLC optimizations is unnecessary, as checking
- * for an idle CPU in the same domain twice is redundant.
- */
- cpus = cpumask_of_node(cpu_to_node(cpu));
- if ((cpumask_weight(cpus) < num_online_cpus()) && llc_numa_mismatch())
- enable_numa = true;
rcu_read_unlock();

pr_debug("sched_ext: LLC idle selection %s\n",
enable_llc ? "enabled" : "disabled");
- pr_debug("sched_ext: NUMA idle selection %s\n",
- enable_numa ? "enabled" : "disabled");

if (enable_llc)
static_branch_enable_cpuslocked(&scx_selcpu_topo_llc);
else
static_branch_disable_cpuslocked(&scx_selcpu_topo_llc);
- if (enable_numa)
- static_branch_enable_cpuslocked(&scx_selcpu_topo_numa);
- else
- static_branch_disable_cpuslocked(&scx_selcpu_topo_numa);
}

/*
@@ -3388,9 +3366,8 @@ static void update_selcpu_topology(void)
* 4. Pick a CPU within the same NUMA node, if enabled:
* - choose a CPU from the same NUMA node to reduce memory access latency.
*
- * Step 3 and 4 are performed only if the system has, respectively, multiple
- * LLC domains / multiple NUMA nodes (see scx_selcpu_topo_llc and
- * scx_selcpu_topo_numa).
+ * Step 3 is performed only if the system has multiple LLC domains that are not
+ * perfectly overlapping with the NUMA domains (see scx_selcpu_topo_llc).
*
* NOTE: tasks that can only run on 1 CPU are excluded by this logic, because
* we never call ops.select_cpu() for them, see select_task_rq().
@@ -3399,7 +3376,6 @@ static s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
u64 wake_flags, bool *found)
{
const struct cpumask *llc_cpus = NULL;
- const struct cpumask *numa_cpus = NULL;
int node = cpu_to_node(prev_cpu);
s32 cpu;

@@ -3422,9 +3398,6 @@ static s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
* defined by user-space.
*/
if (p->nr_cpus_allowed >= num_possible_cpus()) {
- if (static_branch_maybe(CONFIG_NUMA, &scx_selcpu_topo_numa))
- numa_cpus = p->cpus_ptr;
-
if (static_branch_maybe(CONFIG_SCHED_MC, &scx_selcpu_topo_llc)) {
struct sched_domain *sd;

@@ -3494,15 +3467,6 @@ static s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
goto cpu_found;
}

- /*
- * Search for any fully idle core in the same NUMA node.
- */
- if (numa_cpus) {
- cpu = scx_pick_idle_cpu_from_node(node, numa_cpus, SCX_PICK_IDLE_CORE);
- if (cpu >= 0)
- goto cpu_found;
- }
-
/*
* Search for any full idle core usable by the task.
*/
@@ -3528,15 +3492,6 @@ static s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
goto cpu_found;
}

- /*
- * Search for any idle CPU in the same NUMA node.
- */
- if (numa_cpus) {
- cpu = scx_pick_idle_cpu_from_node(node, numa_cpus, 0);
- if (cpu >= 0)
- goto cpu_found;
- }
-
/*
* Search for any idle CPU usable by the task.
*/
--
2.47.1