[PATCH v3 08/21] sched/cache: Calculate the percpu sd task LLC preference

From: Tim Chen

Date: Tue Feb 10 2026 - 17:15:29 EST

Calculate the number of tasks' LLC preferences for each runqueue.
This statistic is computed during task enqueue and dequeue
operations, and is used by the cache-aware load balancing.

Co-developed-by: Chen Yu <yu.c.chen@xxxxxxxxx>
Signed-off-by: Chen Yu <yu.c.chen@xxxxxxxxx>
Signed-off-by: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx>
---

Notes:
v2->v3: Move max_llcs check from patch4 to this patch.
This would clarify the rationale for the
max_llc check and makes review easier (Peter Zijlstra).

kernel/sched/fair.c | 56 +++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 54 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6ad9ad2f918f..4a98aa866d65 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1199,28 +1199,80 @@ static int llc_id(int cpu)
return per_cpu(sd_llc_id, cpu);
}

+static inline bool valid_llc_id(int id)
+{
+ if (unlikely(id < 0 || id >= max_llcs))
+ return false;
+
+ return true;
+}
+
+static inline bool valid_llc_buf(struct sched_domain *sd,
+ int id)
+{
+ /*
+ * The check for sd and its corresponding pf is to
+ * confirm that the sd->pf[] has been allocated in
+ * build_sched_domains() after the assignment of
+ * per_cpu(sd_llc_id, i). This is used to avoid
+ * the race condition.
+ */
+ if (unlikely(!sd || !sd->pf))
+ return false;
+
+ return valid_llc_id(id);
+}
+
static void account_llc_enqueue(struct rq *rq, struct task_struct *p)
{
+ struct sched_domain *sd;
int pref_llc;

pref_llc = p->preferred_llc;
- if (pref_llc < 0)
+ if (!valid_llc_id(pref_llc))
return;

rq->nr_llc_running++;
rq->nr_pref_llc_running += (pref_llc == task_llc(p));
+
+ scoped_guard (rcu) {
+ sd = rcu_dereference(rq->sd);
+ if (valid_llc_buf(sd, pref_llc))
+ sd->pf[pref_llc]++;
+ }
}

static void account_llc_dequeue(struct rq *rq, struct task_struct *p)
{
+ struct sched_domain *sd;
int pref_llc;

pref_llc = p->preferred_llc;
- if (pref_llc < 0)
+ if (!valid_llc_id(pref_llc))
return;

rq->nr_llc_running--;
rq->nr_pref_llc_running -= (pref_llc == task_llc(p));
+
+ scoped_guard (rcu) {
+ sd = rcu_dereference(rq->sd);
+ if (valid_llc_buf(sd, pref_llc)) {
+ /*
+ * There is a race condition between dequeue
+ * and CPU hotplug. After a task has been enqueued
+ * on CPUx, a CPU hotplug event occurs, and all online
+ * CPUs (including CPUx) rebuild their sched_domains
+ * and reset statistics to zero (including sd->pf).
+ * This can cause temporary undercount and we have to
+ * check for such underflow in sd->pf.
+ *
+ * This undercount is temporary and accurate accounting
+ * will resume once the rq has a chance to be idle.
+ */
+ if (sd->pf[pref_llc])
+ sd->pf[pref_llc]--;
+ }
+ }
}

void mm_init_sched(struct mm_struct *mm,
--
2.32.0