[PATCH v4 06/20] sched/core: allow only preferred CPUs in is_cpu_allowed
From: Shrikanth Hegde
Date: Wed Jun 17 2026 - 13:44:25 EST
When possible, choose a preferred CPUs to pick.
Push task mechanism uses stopper thread which going to call
select_fallback_rq and use this mechanism to pick only a preferred CPU.
When task is affined only to non-preferred CPUs it should continue to
run there. Detect that by checking if cpus_ptr and cpu_preferred_mask
intersect or not.
Since is_cpu_allowed can be called directly or repeatedly in
select_fallback_rq, encode the info in task_struct->has_preferred_cpu_state
if the path is via select_fallback_rq or not.
This helps to avoid N**2 complexity for the rare cases.
Signed-off-by: Shrikanth Hegde <sshegde@xxxxxxxxxxxxx>
---
v3->v4:
- Missing case of PF_KTHREAD is avoided.
- Add a new field in task_struct which encodes intersection of
tasks affinity and preferred CPUs and path its coming from.
include/linux/sched.h | 1 +
kernel/sched/core.c | 34 ++++++++++++++++++++++++++++++++--
kernel/sched/sched.h | 18 ++++++++++++++++++
3 files changed, 51 insertions(+), 2 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index fc6ecb3869dd..2d0b1a6d50ac 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1657,6 +1657,7 @@ struct task_struct {
#ifdef CONFIG_UNWIND_USER
struct unwind_task_info unwind_info;
#endif
+ int has_preferred_cpu_state;
/* CPU-specific state of this task: */
struct thread_struct thread;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9e16946c9d62..714816cfa975 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2500,6 +2500,8 @@ static inline bool rq_has_pinned_tasks(struct rq *rq)
*/
static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
{
+ bool task_check_preferred_cpu = false;
+
/* When not in the task's cpumask, no point in looking further. */
if (!task_allowed_on_cpu(p, cpu))
return false;
@@ -2508,9 +2510,22 @@ static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
if (is_migration_disabled(p))
return cpu_online(cpu);
+ /*
+ * This is essential to maintain user affinities when preferred
+ * CPUs change. A task pinned on non-preferred CPU should continue
+ * to run there, since this is non-user triggered.
+ *
+ * If CPU is non-preferred and task can run on other CPUs which are
+ * currently preferred, then choose those other CPUs instead
+ */
+ task_check_preferred_cpu = !cpu_preferred(cpu) && task_has_preferred_cpus(p);
+
/* Non kernel threads are not allowed during either online or offline. */
- if (!(p->flags & PF_KTHREAD))
+ if (!(p->flags & PF_KTHREAD)) {
+ if (task_check_preferred_cpu)
+ return false;
return cpu_active(cpu);
+ }
/* KTHREAD_IS_PER_CPU is always allowed. */
if (kthread_is_per_cpu(p))
@@ -2520,6 +2535,10 @@ static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
if (cpu_dying(cpu))
return false;
+ /* Try on preferred CPU first if possible*/
+ if (task_check_preferred_cpu)
+ return false;
+
/* But are allowed during online. */
return cpu_online(cpu);
}
@@ -3549,6 +3568,14 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
enum { cpuset, possible, fail } state = cpuset;
int dest_cpu;
+ /*
+ * Cache value whether task's affinity spans preferred CPUs.
+ * This helps to avoid repeating the same for each CPU
+ * later in the loop. Encode call to is_cpu_allowed coming
+ * via select_fallback_rq.
+ */
+ p->has_preferred_cpu_state = task_has_preferred_cpus(p) << 8 | 0x1;
+
/*
* If the node that the CPU is on has been offlined, cpu_to_node()
* will return -1. There is no CPU on the node, and we should
@@ -3560,7 +3587,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
/* Look for allowed, online CPU in same node. */
for_each_cpu(dest_cpu, nodemask) {
if (is_cpu_allowed(p, dest_cpu))
- return dest_cpu;
+ goto clear_and_return;
}
}
@@ -3604,6 +3631,8 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
}
}
+clear_and_return:
+ p->has_preferred_cpu_state = 0;
return dest_cpu;
}
@@ -4612,6 +4641,7 @@ static void __sched_fork(u64 clone_flags, struct task_struct *p)
init_numa_balancing(clone_flags, p);
p->wake_entry.u_flags = CSD_TYPE_TTWU;
p->migration_pending = NULL;
+ p->has_preferred_cpu_state = 0;
init_sched_mm(p);
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c7c2dea65edd..38fd84b0b8f8 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -4213,4 +4213,22 @@ DEFINE_CLASS_IS_UNCONDITIONAL(sched_change)
#include "ext.h"
+/*
+ * has_preferred_cpu_state is encoding two bits of information.
+ * First Byte is to encode where the call to is_cpu_allowed coming from.
+ * Second Byte is to encode the intersection of task affinity
+ * and cpu_preferred_mask.
+ *
+ * If 1st Byte is set, call to is_cpu_allowed coming from select_fallback_rq.
+ * That helps to avoid repeated calculation keeping time complexity same.
+ */
+static inline bool task_has_preferred_cpus(struct task_struct *p)
+{
+ int cached_value = p->has_preferred_cpu_state;
+
+ if (cached_value & 0x1)
+ return p->has_preferred_cpu_state >> 8;
+ else
+ return cpumask_intersects(p->cpus_ptr, cpu_preferred_mask);
+}
#endif /* _KERNEL_SCHED_SCHED_H */
--
2.47.3