[Patch v4 15/22] sched/cache: Respect LLC preference in task migration and detach

From: Tim Chen

Date: Wed Apr 01 2026 - 17:48:21 EST

During load balancing, make can_migrate_task()
consider a task's LLC preference.
Prevent a task from being moved out of its preferred LLC.

During the regular load balancing, if
the task cannot be migrated due to LLC locality, the
nr_balance_failed also should not be increased.

Suggested-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Suggested-by: K Prateek Nayak <kprateek.nayak@xxxxxxx>
Co-developed-by: Chen Yu <yu.c.chen@xxxxxxxxx>
Signed-off-by: Chen Yu <yu.c.chen@xxxxxxxxx>
Signed-off-by: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx>
---

Notes:
v3->v4:
Fix the bug in migrate_degrades_llc() that p->preferred_llc should
be used for comparison rather than task_llc(p).
(Madadi Vineeth Reddy)

Let nr_balance_failed overwrite cache-aware migration if the
former is too high. (Peter Zijlstra, K Prateek Nayak)

kernel/sched/fair.c | 83 +++++++++++++++++++++++++++++++++++++++++---
kernel/sched/sched.h | 13 +++++++
2 files changed, 91 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index fef916afa1d5..9541e94370e7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9777,6 +9777,7 @@ enum migration_type {
#define LBF_DST_PINNED 0x04
#define LBF_SOME_PINNED 0x08
#define LBF_ACTIVE_LB 0x10
+#define LBF_LLC_PINNED 0x20

struct lb_env {
struct sched_domain *sd;
@@ -10089,8 +10090,8 @@ static enum llc_mig can_migrate_llc(int src_cpu, int dst_cpu,
* Check if task p can migrate from source LLC to
* destination LLC in terms of cache aware load balance.
*/
-static __maybe_unused enum llc_mig can_migrate_llc_task(int src_cpu, int dst_cpu,
- struct task_struct *p)
+static enum llc_mig can_migrate_llc_task(int src_cpu, int dst_cpu,
+ struct task_struct *p)
{
struct mm_struct *mm;
bool to_pref;
@@ -10157,6 +10158,46 @@ alb_break_llc(struct lb_env *env)

return false;
}
+
+/*
+ * Check if migrating task p from env->src_cpu to
+ * env->dst_cpu breaks LLC localiy.
+ */
+static bool migrate_degrades_llc(struct task_struct *p, struct lb_env *env)
+{
+ if (!sched_cache_enabled())
+ return false;
+
+ if (task_has_sched_core(p))
+ return false;
+ /*
+ * Skip over tasks that would degrade LLC locality;
+ * only when nr_balanced_failed is sufficiently high do we
+ * ignore this constraint.
+ *
+ * Threshold of cache_nice_tries is set to 1 higher
+ * than nr_balance_failed to avoid excessive task
+ * migration at the same time.
+ */
+ if (env->sd->nr_balance_failed >= env->sd->cache_nice_tries + 1)
+ return false;
+
+ /*
+ * We know the env->src_cpu has some tasks prefer to
+ * run on env->dst_cpu, skip the tasks do not prefer
+ * env->dst_cpu, and find the one that prefers.
+ */
+ if (env->migration_type == migrate_llc_task &&
+ READ_ONCE(p->preferred_llc) != llc_id(env->dst_cpu))
+ return true;
+
+ if (can_migrate_llc_task(env->src_cpu,
+ env->dst_cpu, p) != mig_forbid)
+ return false;
+
+ return true;
+}
+
#else
static inline bool get_llc_stats(int cpu, unsigned long *util,
unsigned long *cap)
@@ -10169,6 +10210,12 @@ alb_break_llc(struct lb_env *env)
{
return false;
}
+
+static inline bool
+migrate_degrades_llc(struct task_struct *p, struct lb_env *env)
+{
+ return false;
+}
#endif
/*
* can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
@@ -10266,10 +10313,29 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
return 1;

degrades = migrate_degrades_locality(p, env);
- if (!degrades)
+ if (!degrades) {
+ /*
+ * If the NUMA locality is not broken,
+ * further check if migration would hurt
+ * LLC locality.
+ */
+ if (migrate_degrades_llc(p, env)) {
+ /*
+ * If regular load balancing fails to pull a task
+ * due to LLC locality, this is expected behavior
+ * and we set LBF_LLC_PINNED so we don't increase
+ * nr_balance_failed unecessarily.
+ */
+ if (env->migration_type != migrate_llc_task)
+ env->flags |= LBF_LLC_PINNED;
+
+ return 0;
+ }
+
hot = task_hot(p, env);
- else
+ } else {
hot = degrades > 0;
+ }

if (!hot || env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
if (hot)
@@ -12910,9 +12976,16 @@ static int sched_balance_rq(int this_cpu, struct rq *this_rq,
*
* Similarly for migration_misfit which is not related to
* load/util migration, don't pollute nr_balance_failed.
+ *
+ * The same for cache aware scheduling's allowance for
+ * load imbalance. If regular load balance does not
+ * migrate task due to LLC locality, it is a expected
+ * behavior and don't pollute nr_balance_failed.
+ * See can_migrate_task().
*/
if (idle != CPU_NEWLY_IDLE &&
- env.migration_type != migrate_misfit)
+ env.migration_type != migrate_misfit &&
+ !(env.flags & LBF_LLC_PINNED))
sd->nr_balance_failed++;

if (need_active_balance(&env)) {
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 081f23a48414..511c85572b96 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1546,6 +1546,14 @@ extern void sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags);
extern void sched_core_get(void);
extern void sched_core_put(void);

+static inline bool task_has_sched_core(struct task_struct *p)
+{
+ if (sched_core_disabled())
+ return false;
+
+ return !!p->core_cookie;
+}
+
#else /* !CONFIG_SCHED_CORE: */

static inline bool sched_core_enabled(struct rq *rq)
@@ -1586,6 +1594,11 @@ static inline bool sched_group_cookie_match(struct rq *rq,
return true;
}

+static inline bool task_has_sched_core(struct task_struct *p)
+{
+ return false;
+}
+
#endif /* !CONFIG_SCHED_CORE */

#ifdef CONFIG_RT_GROUP_SCHED
--
2.32.0